xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c (revision 4f89e4b8)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36 
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 #include "vega10_enum.h"
40 #include "hdp/hdp_4_0_offset.h"
41 
42 #include "soc15.h"
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
46 
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48 
49 #include "amdgpu_ras.h"
50 
51 #define GFX9_NUM_GFX_RINGS     1
52 #define GFX9_MEC_HPD_SIZE 4096
53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
55 
56 #define mmPWR_MISC_CNTL_STATUS					0x0183
57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX				0
58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT	0x0
59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT		0x1
60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK		0x00000001L
61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK		0x00000006L
62 
63 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
64 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
65 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
66 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
69 
70 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
71 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
72 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
73 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
76 
77 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
78 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
79 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
80 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
83 
84 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
85 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
86 MODULE_FIRMWARE("amdgpu/raven_me.bin");
87 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
88 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
89 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
90 
91 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
92 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
93 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
94 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
98 
99 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
100 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
101 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
102 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
105 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
106 
107 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
108 {
109 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
110 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
111 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
112 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
113 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
114 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
115 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
116 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
117 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
118 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
119 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
120 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
121 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
122 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
123 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
124 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
125 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
126 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
127 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
128 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
129 };
130 
131 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
132 {
133 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
134 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
135 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
136 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
137 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
138 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
139 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
140 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
141 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
142 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
143 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
144 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
145 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
146 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
147 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
148 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
149 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
150 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
151 };
152 
153 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
154 {
155 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
156 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
157 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
158 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
159 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
160 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
161 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
162 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
163 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
164 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
165 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
166 };
167 
168 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
169 {
170 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
171 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
172 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
173 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
174 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
175 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
176 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
177 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
178 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
179 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
180 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
181 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
182 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
183 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
184 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
185 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
186 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
187 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
188 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
189 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
190 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
191 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
192 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
193 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
194 };
195 
196 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
197 {
198 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
199 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
200 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
201 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
202 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
203 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
204 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
205 };
206 
207 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
208 {
209 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
210 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
211 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
212 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
213 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
214 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
215 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
216 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
217 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
218 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
219 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
220 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
221 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
222 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
223 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
224 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
225 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
226 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
227 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
228 };
229 
230 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
231 {
232 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
233 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
234 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
235 };
236 
237 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
238 {
239 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
240 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
241 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
242 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
243 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
244 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
245 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
246 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
247 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
248 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
249 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
250 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
251 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
252 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
253 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
254 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
255 };
256 
257 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
258 {
259 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
260 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
261 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
262 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
263 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
264 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
265 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
266 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
267 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
268 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
269 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
270 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
271 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
272 };
273 
274 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
275 {
276 	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
277 	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
278 	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
279 	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
280 	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
281 	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
282 	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
283 	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
284 };
285 
286 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
287 {
288 	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
289 	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
290 	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
291 	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
292 	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
293 	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
294 	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
295 	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
296 };
297 
298 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
299 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
300 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
301 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
302 
303 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
304 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
305 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
306 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
307 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
308                                  struct amdgpu_cu_info *cu_info);
309 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
310 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
311 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
312 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
313 
314 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
315 {
316 	switch (adev->asic_type) {
317 	case CHIP_VEGA10:
318 		if (!amdgpu_virt_support_skip_setting(adev)) {
319 			soc15_program_register_sequence(adev,
320 							 golden_settings_gc_9_0,
321 							 ARRAY_SIZE(golden_settings_gc_9_0));
322 			soc15_program_register_sequence(adev,
323 							 golden_settings_gc_9_0_vg10,
324 							 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
325 		}
326 		break;
327 	case CHIP_VEGA12:
328 		soc15_program_register_sequence(adev,
329 						golden_settings_gc_9_2_1,
330 						ARRAY_SIZE(golden_settings_gc_9_2_1));
331 		soc15_program_register_sequence(adev,
332 						golden_settings_gc_9_2_1_vg12,
333 						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
334 		break;
335 	case CHIP_VEGA20:
336 		soc15_program_register_sequence(adev,
337 						golden_settings_gc_9_0,
338 						ARRAY_SIZE(golden_settings_gc_9_0));
339 		soc15_program_register_sequence(adev,
340 						golden_settings_gc_9_0_vg20,
341 						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
342 		break;
343 	case CHIP_RAVEN:
344 		soc15_program_register_sequence(adev, golden_settings_gc_9_1,
345 						ARRAY_SIZE(golden_settings_gc_9_1));
346 		if (adev->rev_id >= 8)
347 			soc15_program_register_sequence(adev,
348 							golden_settings_gc_9_1_rv2,
349 							ARRAY_SIZE(golden_settings_gc_9_1_rv2));
350 		else
351 			soc15_program_register_sequence(adev,
352 							golden_settings_gc_9_1_rv1,
353 							ARRAY_SIZE(golden_settings_gc_9_1_rv1));
354 		break;
355 	default:
356 		break;
357 	}
358 
359 	soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
360 					(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
361 }
362 
363 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
364 {
365 	adev->gfx.scratch.num_reg = 8;
366 	adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
367 	adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
368 }
369 
370 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
371 				       bool wc, uint32_t reg, uint32_t val)
372 {
373 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
374 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
375 				WRITE_DATA_DST_SEL(0) |
376 				(wc ? WR_CONFIRM : 0));
377 	amdgpu_ring_write(ring, reg);
378 	amdgpu_ring_write(ring, 0);
379 	amdgpu_ring_write(ring, val);
380 }
381 
382 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
383 				  int mem_space, int opt, uint32_t addr0,
384 				  uint32_t addr1, uint32_t ref, uint32_t mask,
385 				  uint32_t inv)
386 {
387 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
388 	amdgpu_ring_write(ring,
389 				 /* memory (1) or register (0) */
390 				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
391 				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
392 				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
393 				 WAIT_REG_MEM_ENGINE(eng_sel)));
394 
395 	if (mem_space)
396 		BUG_ON(addr0 & 0x3); /* Dword align */
397 	amdgpu_ring_write(ring, addr0);
398 	amdgpu_ring_write(ring, addr1);
399 	amdgpu_ring_write(ring, ref);
400 	amdgpu_ring_write(ring, mask);
401 	amdgpu_ring_write(ring, inv); /* poll interval */
402 }
403 
404 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
405 {
406 	struct amdgpu_device *adev = ring->adev;
407 	uint32_t scratch;
408 	uint32_t tmp = 0;
409 	unsigned i;
410 	int r;
411 
412 	r = amdgpu_gfx_scratch_get(adev, &scratch);
413 	if (r)
414 		return r;
415 
416 	WREG32(scratch, 0xCAFEDEAD);
417 	r = amdgpu_ring_alloc(ring, 3);
418 	if (r)
419 		goto error_free_scratch;
420 
421 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
422 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
423 	amdgpu_ring_write(ring, 0xDEADBEEF);
424 	amdgpu_ring_commit(ring);
425 
426 	for (i = 0; i < adev->usec_timeout; i++) {
427 		tmp = RREG32(scratch);
428 		if (tmp == 0xDEADBEEF)
429 			break;
430 		udelay(1);
431 	}
432 
433 	if (i >= adev->usec_timeout)
434 		r = -ETIMEDOUT;
435 
436 error_free_scratch:
437 	amdgpu_gfx_scratch_free(adev, scratch);
438 	return r;
439 }
440 
441 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
442 {
443 	struct amdgpu_device *adev = ring->adev;
444 	struct amdgpu_ib ib;
445 	struct dma_fence *f = NULL;
446 
447 	unsigned index;
448 	uint64_t gpu_addr;
449 	uint32_t tmp;
450 	long r;
451 
452 	r = amdgpu_device_wb_get(adev, &index);
453 	if (r)
454 		return r;
455 
456 	gpu_addr = adev->wb.gpu_addr + (index * 4);
457 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
458 	memset(&ib, 0, sizeof(ib));
459 	r = amdgpu_ib_get(adev, NULL, 16, &ib);
460 	if (r)
461 		goto err1;
462 
463 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
464 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
465 	ib.ptr[2] = lower_32_bits(gpu_addr);
466 	ib.ptr[3] = upper_32_bits(gpu_addr);
467 	ib.ptr[4] = 0xDEADBEEF;
468 	ib.length_dw = 5;
469 
470 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
471 	if (r)
472 		goto err2;
473 
474 	r = dma_fence_wait_timeout(f, false, timeout);
475 	if (r == 0) {
476 		r = -ETIMEDOUT;
477 		goto err2;
478 	} else if (r < 0) {
479 		goto err2;
480 	}
481 
482 	tmp = adev->wb.wb[index];
483 	if (tmp == 0xDEADBEEF)
484 		r = 0;
485 	else
486 		r = -EINVAL;
487 
488 err2:
489 	amdgpu_ib_free(adev, &ib, NULL);
490 	dma_fence_put(f);
491 err1:
492 	amdgpu_device_wb_free(adev, index);
493 	return r;
494 }
495 
496 
497 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
498 {
499 	release_firmware(adev->gfx.pfp_fw);
500 	adev->gfx.pfp_fw = NULL;
501 	release_firmware(adev->gfx.me_fw);
502 	adev->gfx.me_fw = NULL;
503 	release_firmware(adev->gfx.ce_fw);
504 	adev->gfx.ce_fw = NULL;
505 	release_firmware(adev->gfx.rlc_fw);
506 	adev->gfx.rlc_fw = NULL;
507 	release_firmware(adev->gfx.mec_fw);
508 	adev->gfx.mec_fw = NULL;
509 	release_firmware(adev->gfx.mec2_fw);
510 	adev->gfx.mec2_fw = NULL;
511 
512 	kfree(adev->gfx.rlc.register_list_format);
513 }
514 
515 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
516 {
517 	const struct rlc_firmware_header_v2_1 *rlc_hdr;
518 
519 	rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
520 	adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
521 	adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
522 	adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
523 	adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
524 	adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
525 	adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
526 	adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
527 	adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
528 	adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
529 	adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
530 	adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
531 	adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
532 	adev->gfx.rlc.reg_list_format_direct_reg_list_length =
533 			le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
534 }
535 
536 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
537 {
538 	adev->gfx.me_fw_write_wait = false;
539 	adev->gfx.mec_fw_write_wait = false;
540 
541 	switch (adev->asic_type) {
542 	case CHIP_VEGA10:
543 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
544 		    (adev->gfx.me_feature_version >= 42) &&
545 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
546 		    (adev->gfx.pfp_feature_version >= 42))
547 			adev->gfx.me_fw_write_wait = true;
548 
549 		if ((adev->gfx.mec_fw_version >=  0x00000193) &&
550 		    (adev->gfx.mec_feature_version >= 42))
551 			adev->gfx.mec_fw_write_wait = true;
552 		break;
553 	case CHIP_VEGA12:
554 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
555 		    (adev->gfx.me_feature_version >= 44) &&
556 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
557 		    (adev->gfx.pfp_feature_version >= 44))
558 			adev->gfx.me_fw_write_wait = true;
559 
560 		if ((adev->gfx.mec_fw_version >=  0x00000196) &&
561 		    (adev->gfx.mec_feature_version >= 44))
562 			adev->gfx.mec_fw_write_wait = true;
563 		break;
564 	case CHIP_VEGA20:
565 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
566 		    (adev->gfx.me_feature_version >= 44) &&
567 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
568 		    (adev->gfx.pfp_feature_version >= 44))
569 			adev->gfx.me_fw_write_wait = true;
570 
571 		if ((adev->gfx.mec_fw_version >=  0x00000197) &&
572 		    (adev->gfx.mec_feature_version >= 44))
573 			adev->gfx.mec_fw_write_wait = true;
574 		break;
575 	case CHIP_RAVEN:
576 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
577 		    (adev->gfx.me_feature_version >= 42) &&
578 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
579 		    (adev->gfx.pfp_feature_version >= 42))
580 			adev->gfx.me_fw_write_wait = true;
581 
582 		if ((adev->gfx.mec_fw_version >=  0x00000192) &&
583 		    (adev->gfx.mec_feature_version >= 42))
584 			adev->gfx.mec_fw_write_wait = true;
585 		break;
586 	default:
587 		break;
588 	}
589 }
590 
591 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
592 {
593 	switch (adev->asic_type) {
594 	case CHIP_VEGA10:
595 	case CHIP_VEGA12:
596 	case CHIP_VEGA20:
597 		break;
598 	case CHIP_RAVEN:
599 		if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
600 			break;
601 		if ((adev->gfx.rlc_fw_version != 106 &&
602 		     adev->gfx.rlc_fw_version < 531) ||
603 		    (adev->gfx.rlc_fw_version == 53815) ||
604 		    (adev->gfx.rlc_feature_version < 1) ||
605 		    !adev->gfx.rlc.is_rlc_v2_1)
606 			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
607 		break;
608 	default:
609 		break;
610 	}
611 }
612 
613 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
614 {
615 	const char *chip_name;
616 	char fw_name[30];
617 	int err;
618 	struct amdgpu_firmware_info *info = NULL;
619 	const struct common_firmware_header *header = NULL;
620 	const struct gfx_firmware_header_v1_0 *cp_hdr;
621 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
622 	unsigned int *tmp = NULL;
623 	unsigned int i = 0;
624 	uint16_t version_major;
625 	uint16_t version_minor;
626 	uint32_t smu_version;
627 
628 	DRM_DEBUG("\n");
629 
630 	switch (adev->asic_type) {
631 	case CHIP_VEGA10:
632 		chip_name = "vega10";
633 		break;
634 	case CHIP_VEGA12:
635 		chip_name = "vega12";
636 		break;
637 	case CHIP_VEGA20:
638 		chip_name = "vega20";
639 		break;
640 	case CHIP_RAVEN:
641 		if (adev->rev_id >= 8)
642 			chip_name = "raven2";
643 		else if (adev->pdev->device == 0x15d8)
644 			chip_name = "picasso";
645 		else
646 			chip_name = "raven";
647 		break;
648 	default:
649 		BUG();
650 	}
651 
652 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
653 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
654 	if (err)
655 		goto out;
656 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
657 	if (err)
658 		goto out;
659 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
660 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
661 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
662 
663 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
664 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
665 	if (err)
666 		goto out;
667 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
668 	if (err)
669 		goto out;
670 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
671 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
672 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
673 
674 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
675 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
676 	if (err)
677 		goto out;
678 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
679 	if (err)
680 		goto out;
681 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
682 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
683 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
684 
685 	/*
686 	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
687 	 * instead of picasso_rlc.bin.
688 	 * Judgment method:
689 	 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
690 	 *          or revision >= 0xD8 && revision <= 0xDF
691 	 * otherwise is PCO FP5
692 	 */
693 	if (!strcmp(chip_name, "picasso") &&
694 		(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
695 		((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
696 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
697 	else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
698 		(smu_version >= 0x41e2b))
699 		/**
700 		*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
701 		*/
702 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
703 	else
704 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
705 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
706 	if (err)
707 		goto out;
708 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
709 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
710 
711 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
712 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
713 	if (version_major == 2 && version_minor == 1)
714 		adev->gfx.rlc.is_rlc_v2_1 = true;
715 
716 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
717 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
718 	adev->gfx.rlc.save_and_restore_offset =
719 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
720 	adev->gfx.rlc.clear_state_descriptor_offset =
721 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
722 	adev->gfx.rlc.avail_scratch_ram_locations =
723 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
724 	adev->gfx.rlc.reg_restore_list_size =
725 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
726 	adev->gfx.rlc.reg_list_format_start =
727 			le32_to_cpu(rlc_hdr->reg_list_format_start);
728 	adev->gfx.rlc.reg_list_format_separate_start =
729 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
730 	adev->gfx.rlc.starting_offsets_start =
731 			le32_to_cpu(rlc_hdr->starting_offsets_start);
732 	adev->gfx.rlc.reg_list_format_size_bytes =
733 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
734 	adev->gfx.rlc.reg_list_size_bytes =
735 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
736 	adev->gfx.rlc.register_list_format =
737 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
738 				adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
739 	if (!adev->gfx.rlc.register_list_format) {
740 		err = -ENOMEM;
741 		goto out;
742 	}
743 
744 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
745 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
746 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
747 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
748 
749 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
750 
751 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
752 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
753 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
754 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
755 
756 	if (adev->gfx.rlc.is_rlc_v2_1)
757 		gfx_v9_0_init_rlc_ext_microcode(adev);
758 
759 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
760 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
761 	if (err)
762 		goto out;
763 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
764 	if (err)
765 		goto out;
766 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
767 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
768 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
769 
770 
771 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
772 	err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
773 	if (!err) {
774 		err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
775 		if (err)
776 			goto out;
777 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
778 		adev->gfx.mec2_fw->data;
779 		adev->gfx.mec2_fw_version =
780 		le32_to_cpu(cp_hdr->header.ucode_version);
781 		adev->gfx.mec2_feature_version =
782 		le32_to_cpu(cp_hdr->ucode_feature_version);
783 	} else {
784 		err = 0;
785 		adev->gfx.mec2_fw = NULL;
786 	}
787 
788 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
789 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
790 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
791 		info->fw = adev->gfx.pfp_fw;
792 		header = (const struct common_firmware_header *)info->fw->data;
793 		adev->firmware.fw_size +=
794 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
795 
796 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
797 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
798 		info->fw = adev->gfx.me_fw;
799 		header = (const struct common_firmware_header *)info->fw->data;
800 		adev->firmware.fw_size +=
801 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
802 
803 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
804 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
805 		info->fw = adev->gfx.ce_fw;
806 		header = (const struct common_firmware_header *)info->fw->data;
807 		adev->firmware.fw_size +=
808 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
809 
810 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
811 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
812 		info->fw = adev->gfx.rlc_fw;
813 		header = (const struct common_firmware_header *)info->fw->data;
814 		adev->firmware.fw_size +=
815 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
816 
817 		if (adev->gfx.rlc.is_rlc_v2_1 &&
818 		    adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
819 		    adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
820 		    adev->gfx.rlc.save_restore_list_srm_size_bytes) {
821 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
822 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
823 			info->fw = adev->gfx.rlc_fw;
824 			adev->firmware.fw_size +=
825 				ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
826 
827 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
828 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
829 			info->fw = adev->gfx.rlc_fw;
830 			adev->firmware.fw_size +=
831 				ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
832 
833 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
834 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
835 			info->fw = adev->gfx.rlc_fw;
836 			adev->firmware.fw_size +=
837 				ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
838 		}
839 
840 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
841 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
842 		info->fw = adev->gfx.mec_fw;
843 		header = (const struct common_firmware_header *)info->fw->data;
844 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
845 		adev->firmware.fw_size +=
846 			ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
847 
848 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
849 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
850 		info->fw = adev->gfx.mec_fw;
851 		adev->firmware.fw_size +=
852 			ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
853 
854 		if (adev->gfx.mec2_fw) {
855 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
856 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
857 			info->fw = adev->gfx.mec2_fw;
858 			header = (const struct common_firmware_header *)info->fw->data;
859 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
860 			adev->firmware.fw_size +=
861 				ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
862 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
863 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
864 			info->fw = adev->gfx.mec2_fw;
865 			adev->firmware.fw_size +=
866 				ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
867 		}
868 
869 	}
870 
871 out:
872 	gfx_v9_0_check_if_need_gfxoff(adev);
873 	gfx_v9_0_check_fw_write_wait(adev);
874 	if (err) {
875 		dev_err(adev->dev,
876 			"gfx9: Failed to load firmware \"%s\"\n",
877 			fw_name);
878 		release_firmware(adev->gfx.pfp_fw);
879 		adev->gfx.pfp_fw = NULL;
880 		release_firmware(adev->gfx.me_fw);
881 		adev->gfx.me_fw = NULL;
882 		release_firmware(adev->gfx.ce_fw);
883 		adev->gfx.ce_fw = NULL;
884 		release_firmware(adev->gfx.rlc_fw);
885 		adev->gfx.rlc_fw = NULL;
886 		release_firmware(adev->gfx.mec_fw);
887 		adev->gfx.mec_fw = NULL;
888 		release_firmware(adev->gfx.mec2_fw);
889 		adev->gfx.mec2_fw = NULL;
890 	}
891 	return err;
892 }
893 
894 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
895 {
896 	u32 count = 0;
897 	const struct cs_section_def *sect = NULL;
898 	const struct cs_extent_def *ext = NULL;
899 
900 	/* begin clear state */
901 	count += 2;
902 	/* context control state */
903 	count += 3;
904 
905 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
906 		for (ext = sect->section; ext->extent != NULL; ++ext) {
907 			if (sect->id == SECT_CONTEXT)
908 				count += 2 + ext->reg_count;
909 			else
910 				return 0;
911 		}
912 	}
913 
914 	/* end clear state */
915 	count += 2;
916 	/* clear state */
917 	count += 2;
918 
919 	return count;
920 }
921 
922 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
923 				    volatile u32 *buffer)
924 {
925 	u32 count = 0, i;
926 	const struct cs_section_def *sect = NULL;
927 	const struct cs_extent_def *ext = NULL;
928 
929 	if (adev->gfx.rlc.cs_data == NULL)
930 		return;
931 	if (buffer == NULL)
932 		return;
933 
934 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
935 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
936 
937 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
938 	buffer[count++] = cpu_to_le32(0x80000000);
939 	buffer[count++] = cpu_to_le32(0x80000000);
940 
941 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
942 		for (ext = sect->section; ext->extent != NULL; ++ext) {
943 			if (sect->id == SECT_CONTEXT) {
944 				buffer[count++] =
945 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
946 				buffer[count++] = cpu_to_le32(ext->reg_index -
947 						PACKET3_SET_CONTEXT_REG_START);
948 				for (i = 0; i < ext->reg_count; i++)
949 					buffer[count++] = cpu_to_le32(ext->extent[i]);
950 			} else {
951 				return;
952 			}
953 		}
954 	}
955 
956 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
957 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
958 
959 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
960 	buffer[count++] = cpu_to_le32(0);
961 }
962 
963 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
964 {
965 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
966 	uint32_t pg_always_on_cu_num = 2;
967 	uint32_t always_on_cu_num;
968 	uint32_t i, j, k;
969 	uint32_t mask, cu_bitmap, counter;
970 
971 	if (adev->flags & AMD_IS_APU)
972 		always_on_cu_num = 4;
973 	else if (adev->asic_type == CHIP_VEGA12)
974 		always_on_cu_num = 8;
975 	else
976 		always_on_cu_num = 12;
977 
978 	mutex_lock(&adev->grbm_idx_mutex);
979 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
980 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
981 			mask = 1;
982 			cu_bitmap = 0;
983 			counter = 0;
984 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
985 
986 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
987 				if (cu_info->bitmap[i][j] & mask) {
988 					if (counter == pg_always_on_cu_num)
989 						WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
990 					if (counter < always_on_cu_num)
991 						cu_bitmap |= mask;
992 					else
993 						break;
994 					counter++;
995 				}
996 				mask <<= 1;
997 			}
998 
999 			WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1000 			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1001 		}
1002 	}
1003 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1004 	mutex_unlock(&adev->grbm_idx_mutex);
1005 }
1006 
1007 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1008 {
1009 	uint32_t data;
1010 
1011 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1012 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1013 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1014 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1015 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1016 
1017 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1018 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1019 
1020 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1021 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1022 
1023 	mutex_lock(&adev->grbm_idx_mutex);
1024 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1025 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1026 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1027 
1028 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1029 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1030 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1031 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1032 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1033 
1034 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1035 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1036 	data &= 0x0000FFFF;
1037 	data |= 0x00C00000;
1038 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1039 
1040 	/*
1041 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1042 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1043 	 */
1044 
1045 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1046 	 * but used for RLC_LB_CNTL configuration */
1047 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1048 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1049 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1050 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1051 	mutex_unlock(&adev->grbm_idx_mutex);
1052 
1053 	gfx_v9_0_init_always_on_cu_mask(adev);
1054 }
1055 
1056 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1057 {
1058 	uint32_t data;
1059 
1060 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1061 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1062 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1063 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1064 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1065 
1066 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1067 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1068 
1069 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1070 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1071 
1072 	mutex_lock(&adev->grbm_idx_mutex);
1073 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1074 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1075 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1076 
1077 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1078 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1079 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1080 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1081 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1082 
1083 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1084 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1085 	data &= 0x0000FFFF;
1086 	data |= 0x00C00000;
1087 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1088 
1089 	/*
1090 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1091 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1092 	 */
1093 
1094 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1095 	 * but used for RLC_LB_CNTL configuration */
1096 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1097 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1098 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1099 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1100 	mutex_unlock(&adev->grbm_idx_mutex);
1101 
1102 	gfx_v9_0_init_always_on_cu_mask(adev);
1103 }
1104 
1105 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1106 {
1107 	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1108 }
1109 
1110 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1111 {
1112 	return 5;
1113 }
1114 
1115 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1116 {
1117 	const struct cs_section_def *cs_data;
1118 	int r;
1119 
1120 	adev->gfx.rlc.cs_data = gfx9_cs_data;
1121 
1122 	cs_data = adev->gfx.rlc.cs_data;
1123 
1124 	if (cs_data) {
1125 		/* init clear state block */
1126 		r = amdgpu_gfx_rlc_init_csb(adev);
1127 		if (r)
1128 			return r;
1129 	}
1130 
1131 	if (adev->asic_type == CHIP_RAVEN) {
1132 		/* TODO: double check the cp_table_size for RV */
1133 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1134 		r = amdgpu_gfx_rlc_init_cpt(adev);
1135 		if (r)
1136 			return r;
1137 	}
1138 
1139 	switch (adev->asic_type) {
1140 	case CHIP_RAVEN:
1141 		gfx_v9_0_init_lbpw(adev);
1142 		break;
1143 	case CHIP_VEGA20:
1144 		gfx_v9_4_init_lbpw(adev);
1145 		break;
1146 	default:
1147 		break;
1148 	}
1149 
1150 	return 0;
1151 }
1152 
1153 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
1154 {
1155 	int r;
1156 
1157 	r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1158 	if (unlikely(r != 0))
1159 		return r;
1160 
1161 	r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1162 			AMDGPU_GEM_DOMAIN_VRAM);
1163 	if (!r)
1164 		adev->gfx.rlc.clear_state_gpu_addr =
1165 			amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1166 
1167 	amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1168 
1169 	return r;
1170 }
1171 
1172 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
1173 {
1174 	int r;
1175 
1176 	if (!adev->gfx.rlc.clear_state_obj)
1177 		return;
1178 
1179 	r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1180 	if (likely(r == 0)) {
1181 		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1182 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1183 	}
1184 }
1185 
1186 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1187 {
1188 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1189 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1190 }
1191 
1192 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1193 {
1194 	int r;
1195 	u32 *hpd;
1196 	const __le32 *fw_data;
1197 	unsigned fw_size;
1198 	u32 *fw;
1199 	size_t mec_hpd_size;
1200 
1201 	const struct gfx_firmware_header_v1_0 *mec_hdr;
1202 
1203 	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1204 
1205 	/* take ownership of the relevant compute queues */
1206 	amdgpu_gfx_compute_queue_acquire(adev);
1207 	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1208 
1209 	r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1210 				      AMDGPU_GEM_DOMAIN_VRAM,
1211 				      &adev->gfx.mec.hpd_eop_obj,
1212 				      &adev->gfx.mec.hpd_eop_gpu_addr,
1213 				      (void **)&hpd);
1214 	if (r) {
1215 		dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1216 		gfx_v9_0_mec_fini(adev);
1217 		return r;
1218 	}
1219 
1220 	memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1221 
1222 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1223 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1224 
1225 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1226 
1227 	fw_data = (const __le32 *)
1228 		(adev->gfx.mec_fw->data +
1229 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1230 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1231 
1232 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1233 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1234 				      &adev->gfx.mec.mec_fw_obj,
1235 				      &adev->gfx.mec.mec_fw_gpu_addr,
1236 				      (void **)&fw);
1237 	if (r) {
1238 		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1239 		gfx_v9_0_mec_fini(adev);
1240 		return r;
1241 	}
1242 
1243 	memcpy(fw, fw_data, fw_size);
1244 
1245 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1246 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1247 
1248 	return 0;
1249 }
1250 
1251 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1252 {
1253 	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1254 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1255 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1256 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
1257 		(SQ_IND_INDEX__FORCE_READ_MASK));
1258 	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1259 }
1260 
1261 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1262 			   uint32_t wave, uint32_t thread,
1263 			   uint32_t regno, uint32_t num, uint32_t *out)
1264 {
1265 	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1266 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1267 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1268 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
1269 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1270 		(SQ_IND_INDEX__FORCE_READ_MASK) |
1271 		(SQ_IND_INDEX__AUTO_INCR_MASK));
1272 	while (num--)
1273 		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1274 }
1275 
1276 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1277 {
1278 	/* type 1 wave data */
1279 	dst[(*no_fields)++] = 1;
1280 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1281 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1282 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1283 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1284 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1285 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1286 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1287 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1288 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1289 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1290 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1291 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1292 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1293 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1294 }
1295 
1296 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1297 				     uint32_t wave, uint32_t start,
1298 				     uint32_t size, uint32_t *dst)
1299 {
1300 	wave_read_regs(
1301 		adev, simd, wave, 0,
1302 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1303 }
1304 
1305 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1306 				     uint32_t wave, uint32_t thread,
1307 				     uint32_t start, uint32_t size,
1308 				     uint32_t *dst)
1309 {
1310 	wave_read_regs(
1311 		adev, simd, wave, thread,
1312 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1313 }
1314 
1315 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1316 				  u32 me, u32 pipe, u32 q, u32 vm)
1317 {
1318 	soc15_grbm_select(adev, me, pipe, q, vm);
1319 }
1320 
1321 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1322 	.get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1323 	.select_se_sh = &gfx_v9_0_select_se_sh,
1324 	.read_wave_data = &gfx_v9_0_read_wave_data,
1325 	.read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1326 	.read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1327 	.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q
1328 };
1329 
1330 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1331 {
1332 	u32 gb_addr_config;
1333 	int err;
1334 
1335 	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1336 
1337 	switch (adev->asic_type) {
1338 	case CHIP_VEGA10:
1339 		adev->gfx.config.max_hw_contexts = 8;
1340 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1341 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1342 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1343 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1344 		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1345 		break;
1346 	case CHIP_VEGA12:
1347 		adev->gfx.config.max_hw_contexts = 8;
1348 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1349 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1350 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1351 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1352 		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1353 		DRM_INFO("fix gfx.config for vega12\n");
1354 		break;
1355 	case CHIP_VEGA20:
1356 		adev->gfx.config.max_hw_contexts = 8;
1357 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1358 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1359 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1360 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1361 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1362 		gb_addr_config &= ~0xf3e777ff;
1363 		gb_addr_config |= 0x22014042;
1364 		/* check vbios table if gpu info is not available */
1365 		err = amdgpu_atomfirmware_get_gfx_info(adev);
1366 		if (err)
1367 			return err;
1368 		break;
1369 	case CHIP_RAVEN:
1370 		adev->gfx.config.max_hw_contexts = 8;
1371 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1372 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1373 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1374 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1375 		if (adev->rev_id >= 8)
1376 			gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1377 		else
1378 			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1379 		break;
1380 	default:
1381 		BUG();
1382 		break;
1383 	}
1384 
1385 	adev->gfx.config.gb_addr_config = gb_addr_config;
1386 
1387 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1388 			REG_GET_FIELD(
1389 					adev->gfx.config.gb_addr_config,
1390 					GB_ADDR_CONFIG,
1391 					NUM_PIPES);
1392 
1393 	adev->gfx.config.max_tile_pipes =
1394 		adev->gfx.config.gb_addr_config_fields.num_pipes;
1395 
1396 	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1397 			REG_GET_FIELD(
1398 					adev->gfx.config.gb_addr_config,
1399 					GB_ADDR_CONFIG,
1400 					NUM_BANKS);
1401 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1402 			REG_GET_FIELD(
1403 					adev->gfx.config.gb_addr_config,
1404 					GB_ADDR_CONFIG,
1405 					MAX_COMPRESSED_FRAGS);
1406 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1407 			REG_GET_FIELD(
1408 					adev->gfx.config.gb_addr_config,
1409 					GB_ADDR_CONFIG,
1410 					NUM_RB_PER_SE);
1411 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1412 			REG_GET_FIELD(
1413 					adev->gfx.config.gb_addr_config,
1414 					GB_ADDR_CONFIG,
1415 					NUM_SHADER_ENGINES);
1416 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1417 			REG_GET_FIELD(
1418 					adev->gfx.config.gb_addr_config,
1419 					GB_ADDR_CONFIG,
1420 					PIPE_INTERLEAVE_SIZE));
1421 
1422 	return 0;
1423 }
1424 
1425 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
1426 				   struct amdgpu_ngg_buf *ngg_buf,
1427 				   int size_se,
1428 				   int default_size_se)
1429 {
1430 	int r;
1431 
1432 	if (size_se < 0) {
1433 		dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se);
1434 		return -EINVAL;
1435 	}
1436 	size_se = size_se ? size_se : default_size_se;
1437 
1438 	ngg_buf->size = size_se * adev->gfx.config.max_shader_engines;
1439 	r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
1440 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1441 				    &ngg_buf->bo,
1442 				    &ngg_buf->gpu_addr,
1443 				    NULL);
1444 	if (r) {
1445 		dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r);
1446 		return r;
1447 	}
1448 	ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo);
1449 
1450 	return r;
1451 }
1452 
1453 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev)
1454 {
1455 	int i;
1456 
1457 	for (i = 0; i < NGG_BUF_MAX; i++)
1458 		amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo,
1459 				      &adev->gfx.ngg.buf[i].gpu_addr,
1460 				      NULL);
1461 
1462 	memset(&adev->gfx.ngg.buf[0], 0,
1463 			sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX);
1464 
1465 	adev->gfx.ngg.init = false;
1466 
1467 	return 0;
1468 }
1469 
1470 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
1471 {
1472 	int r;
1473 
1474 	if (!amdgpu_ngg || adev->gfx.ngg.init == true)
1475 		return 0;
1476 
1477 	/* GDS reserve memory: 64 bytes alignment */
1478 	adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
1479 	adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size;
1480 	adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
1481 	adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
1482 
1483 	/* Primitive Buffer */
1484 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
1485 				    amdgpu_prim_buf_per_se,
1486 				    64 * 1024);
1487 	if (r) {
1488 		dev_err(adev->dev, "Failed to create Primitive Buffer\n");
1489 		goto err;
1490 	}
1491 
1492 	/* Position Buffer */
1493 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS],
1494 				    amdgpu_pos_buf_per_se,
1495 				    256 * 1024);
1496 	if (r) {
1497 		dev_err(adev->dev, "Failed to create Position Buffer\n");
1498 		goto err;
1499 	}
1500 
1501 	/* Control Sideband */
1502 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL],
1503 				    amdgpu_cntl_sb_buf_per_se,
1504 				    256);
1505 	if (r) {
1506 		dev_err(adev->dev, "Failed to create Control Sideband Buffer\n");
1507 		goto err;
1508 	}
1509 
1510 	/* Parameter Cache, not created by default */
1511 	if (amdgpu_param_buf_per_se <= 0)
1512 		goto out;
1513 
1514 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM],
1515 				    amdgpu_param_buf_per_se,
1516 				    512 * 1024);
1517 	if (r) {
1518 		dev_err(adev->dev, "Failed to create Parameter Cache\n");
1519 		goto err;
1520 	}
1521 
1522 out:
1523 	adev->gfx.ngg.init = true;
1524 	return 0;
1525 err:
1526 	gfx_v9_0_ngg_fini(adev);
1527 	return r;
1528 }
1529 
1530 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
1531 {
1532 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
1533 	int r;
1534 	u32 data, base;
1535 
1536 	if (!amdgpu_ngg)
1537 		return 0;
1538 
1539 	/* Program buffer size */
1540 	data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE,
1541 			     adev->gfx.ngg.buf[NGG_PRIM].size >> 8);
1542 	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE,
1543 			     adev->gfx.ngg.buf[NGG_POS].size >> 8);
1544 	WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
1545 
1546 	data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE,
1547 			     adev->gfx.ngg.buf[NGG_CNTL].size >> 8);
1548 	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE,
1549 			     adev->gfx.ngg.buf[NGG_PARAM].size >> 10);
1550 	WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
1551 
1552 	/* Program buffer base address */
1553 	base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
1554 	data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
1555 	WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data);
1556 
1557 	base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
1558 	data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
1559 	WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data);
1560 
1561 	base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
1562 	data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
1563 	WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data);
1564 
1565 	base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
1566 	data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
1567 	WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data);
1568 
1569 	base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
1570 	data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
1571 	WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data);
1572 
1573 	base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
1574 	data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
1575 	WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data);
1576 
1577 	/* Clear GDS reserved memory */
1578 	r = amdgpu_ring_alloc(ring, 17);
1579 	if (r) {
1580 		DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n",
1581 			  ring->name, r);
1582 		return r;
1583 	}
1584 
1585 	gfx_v9_0_write_data_to_reg(ring, 0, false,
1586 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
1587 			           (adev->gds.gds_size +
1588 				    adev->gfx.ngg.gds_reserve_size));
1589 
1590 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
1591 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
1592 				PACKET3_DMA_DATA_DST_SEL(1) |
1593 				PACKET3_DMA_DATA_SRC_SEL(2)));
1594 	amdgpu_ring_write(ring, 0);
1595 	amdgpu_ring_write(ring, 0);
1596 	amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
1597 	amdgpu_ring_write(ring, 0);
1598 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
1599 				adev->gfx.ngg.gds_reserve_size);
1600 
1601 	gfx_v9_0_write_data_to_reg(ring, 0, false,
1602 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0);
1603 
1604 	amdgpu_ring_commit(ring);
1605 
1606 	return 0;
1607 }
1608 
1609 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1610 				      int mec, int pipe, int queue)
1611 {
1612 	int r;
1613 	unsigned irq_type;
1614 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1615 
1616 	ring = &adev->gfx.compute_ring[ring_id];
1617 
1618 	/* mec0 is me1 */
1619 	ring->me = mec + 1;
1620 	ring->pipe = pipe;
1621 	ring->queue = queue;
1622 
1623 	ring->ring_obj = NULL;
1624 	ring->use_doorbell = true;
1625 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
1626 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1627 				+ (ring_id * GFX9_MEC_HPD_SIZE);
1628 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1629 
1630 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1631 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1632 		+ ring->pipe;
1633 
1634 	/* type-2 packets are deprecated on MEC, use type-3 instead */
1635 	r = amdgpu_ring_init(adev, ring, 1024,
1636 			     &adev->gfx.eop_irq, irq_type);
1637 	if (r)
1638 		return r;
1639 
1640 
1641 	return 0;
1642 }
1643 
1644 static int gfx_v9_0_sw_init(void *handle)
1645 {
1646 	int i, j, k, r, ring_id;
1647 	struct amdgpu_ring *ring;
1648 	struct amdgpu_kiq *kiq;
1649 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1650 
1651 	switch (adev->asic_type) {
1652 	case CHIP_VEGA10:
1653 	case CHIP_VEGA12:
1654 	case CHIP_VEGA20:
1655 	case CHIP_RAVEN:
1656 		adev->gfx.mec.num_mec = 2;
1657 		break;
1658 	default:
1659 		adev->gfx.mec.num_mec = 1;
1660 		break;
1661 	}
1662 
1663 	adev->gfx.mec.num_pipe_per_mec = 4;
1664 	adev->gfx.mec.num_queue_per_pipe = 8;
1665 
1666 	/* EOP Event */
1667 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
1668 	if (r)
1669 		return r;
1670 
1671 	/* Privileged reg */
1672 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
1673 			      &adev->gfx.priv_reg_irq);
1674 	if (r)
1675 		return r;
1676 
1677 	/* Privileged inst */
1678 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
1679 			      &adev->gfx.priv_inst_irq);
1680 	if (r)
1681 		return r;
1682 
1683 	/* ECC error */
1684 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
1685 			      &adev->gfx.cp_ecc_error_irq);
1686 	if (r)
1687 		return r;
1688 
1689 	/* FUE error */
1690 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
1691 			      &adev->gfx.cp_ecc_error_irq);
1692 	if (r)
1693 		return r;
1694 
1695 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1696 
1697 	gfx_v9_0_scratch_init(adev);
1698 
1699 	r = gfx_v9_0_init_microcode(adev);
1700 	if (r) {
1701 		DRM_ERROR("Failed to load gfx firmware!\n");
1702 		return r;
1703 	}
1704 
1705 	r = adev->gfx.rlc.funcs->init(adev);
1706 	if (r) {
1707 		DRM_ERROR("Failed to init rlc BOs!\n");
1708 		return r;
1709 	}
1710 
1711 	r = gfx_v9_0_mec_init(adev);
1712 	if (r) {
1713 		DRM_ERROR("Failed to init MEC BOs!\n");
1714 		return r;
1715 	}
1716 
1717 	/* set up the gfx ring */
1718 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1719 		ring = &adev->gfx.gfx_ring[i];
1720 		ring->ring_obj = NULL;
1721 		if (!i)
1722 			sprintf(ring->name, "gfx");
1723 		else
1724 			sprintf(ring->name, "gfx_%d", i);
1725 		ring->use_doorbell = true;
1726 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
1727 		r = amdgpu_ring_init(adev, ring, 1024,
1728 				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
1729 		if (r)
1730 			return r;
1731 	}
1732 
1733 	/* set up the compute queues - allocate horizontally across pipes */
1734 	ring_id = 0;
1735 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1736 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1737 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1738 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
1739 					continue;
1740 
1741 				r = gfx_v9_0_compute_ring_init(adev,
1742 							       ring_id,
1743 							       i, k, j);
1744 				if (r)
1745 					return r;
1746 
1747 				ring_id++;
1748 			}
1749 		}
1750 	}
1751 
1752 	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
1753 	if (r) {
1754 		DRM_ERROR("Failed to init KIQ BOs!\n");
1755 		return r;
1756 	}
1757 
1758 	kiq = &adev->gfx.kiq;
1759 	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
1760 	if (r)
1761 		return r;
1762 
1763 	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
1764 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
1765 	if (r)
1766 		return r;
1767 
1768 	adev->gfx.ce_ram_size = 0x8000;
1769 
1770 	r = gfx_v9_0_gpu_early_init(adev);
1771 	if (r)
1772 		return r;
1773 
1774 	r = gfx_v9_0_ngg_init(adev);
1775 	if (r)
1776 		return r;
1777 
1778 	return 0;
1779 }
1780 
1781 
1782 static int gfx_v9_0_sw_fini(void *handle)
1783 {
1784 	int i;
1785 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1786 
1787 	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
1788 			adev->gfx.ras_if) {
1789 		struct ras_common_if *ras_if = adev->gfx.ras_if;
1790 		struct ras_ih_if ih_info = {
1791 			.head = *ras_if,
1792 		};
1793 
1794 		amdgpu_ras_debugfs_remove(adev, ras_if);
1795 		amdgpu_ras_sysfs_remove(adev, ras_if);
1796 		amdgpu_ras_interrupt_remove_handler(adev,  &ih_info);
1797 		amdgpu_ras_feature_enable(adev, ras_if, 0);
1798 		kfree(ras_if);
1799 	}
1800 
1801 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1802 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1803 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
1804 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1805 
1806 	amdgpu_gfx_mqd_sw_fini(adev);
1807 	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
1808 	amdgpu_gfx_kiq_fini(adev);
1809 
1810 	gfx_v9_0_mec_fini(adev);
1811 	gfx_v9_0_ngg_fini(adev);
1812 	amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1813 	if (adev->asic_type == CHIP_RAVEN) {
1814 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
1815 				&adev->gfx.rlc.cp_table_gpu_addr,
1816 				(void **)&adev->gfx.rlc.cp_table_ptr);
1817 	}
1818 	gfx_v9_0_free_microcode(adev);
1819 
1820 	return 0;
1821 }
1822 
1823 
1824 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
1825 {
1826 	/* TODO */
1827 }
1828 
1829 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
1830 {
1831 	u32 data;
1832 
1833 	if (instance == 0xffffffff)
1834 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
1835 	else
1836 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
1837 
1838 	if (se_num == 0xffffffff)
1839 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
1840 	else
1841 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
1842 
1843 	if (sh_num == 0xffffffff)
1844 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
1845 	else
1846 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
1847 
1848 	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
1849 }
1850 
1851 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1852 {
1853 	u32 data, mask;
1854 
1855 	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
1856 	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
1857 
1858 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1859 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
1860 
1861 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
1862 					 adev->gfx.config.max_sh_per_se);
1863 
1864 	return (~data) & mask;
1865 }
1866 
1867 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
1868 {
1869 	int i, j;
1870 	u32 data;
1871 	u32 active_rbs = 0;
1872 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
1873 					adev->gfx.config.max_sh_per_se;
1874 
1875 	mutex_lock(&adev->grbm_idx_mutex);
1876 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1877 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1878 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1879 			data = gfx_v9_0_get_rb_active_bitmap(adev);
1880 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
1881 					       rb_bitmap_width_per_sh);
1882 		}
1883 	}
1884 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1885 	mutex_unlock(&adev->grbm_idx_mutex);
1886 
1887 	adev->gfx.config.backend_enable_mask = active_rbs;
1888 	adev->gfx.config.num_rbs = hweight32(active_rbs);
1889 }
1890 
1891 #define DEFAULT_SH_MEM_BASES	(0x6000)
1892 #define FIRST_COMPUTE_VMID	(8)
1893 #define LAST_COMPUTE_VMID	(16)
1894 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
1895 {
1896 	int i;
1897 	uint32_t sh_mem_config;
1898 	uint32_t sh_mem_bases;
1899 
1900 	/*
1901 	 * Configure apertures:
1902 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
1903 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
1904 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
1905 	 */
1906 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
1907 
1908 	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
1909 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
1910 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
1911 
1912 	mutex_lock(&adev->srbm_mutex);
1913 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
1914 		soc15_grbm_select(adev, 0, 0, 0, i);
1915 		/* CP and shaders */
1916 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
1917 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
1918 	}
1919 	soc15_grbm_select(adev, 0, 0, 0, 0);
1920 	mutex_unlock(&adev->srbm_mutex);
1921 }
1922 
1923 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
1924 {
1925 	u32 tmp;
1926 	int i;
1927 
1928 	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
1929 
1930 	gfx_v9_0_tiling_mode_table_init(adev);
1931 
1932 	gfx_v9_0_setup_rb(adev);
1933 	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
1934 	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
1935 
1936 	/* XXX SH_MEM regs */
1937 	/* where to put LDS, scratch, GPUVM in FSA64 space */
1938 	mutex_lock(&adev->srbm_mutex);
1939 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) {
1940 		soc15_grbm_select(adev, 0, 0, 0, i);
1941 		/* CP and shaders */
1942 		if (i == 0) {
1943 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1944 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1945 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
1946 					    !!amdgpu_noretry);
1947 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
1948 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
1949 		} else {
1950 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1951 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1952 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
1953 					    !!amdgpu_noretry);
1954 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
1955 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
1956 				(adev->gmc.private_aperture_start >> 48));
1957 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
1958 				(adev->gmc.shared_aperture_start >> 48));
1959 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
1960 		}
1961 	}
1962 	soc15_grbm_select(adev, 0, 0, 0, 0);
1963 
1964 	mutex_unlock(&adev->srbm_mutex);
1965 
1966 	gfx_v9_0_init_compute_vmid(adev);
1967 }
1968 
1969 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
1970 {
1971 	u32 i, j, k;
1972 	u32 mask;
1973 
1974 	mutex_lock(&adev->grbm_idx_mutex);
1975 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1976 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1977 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1978 			for (k = 0; k < adev->usec_timeout; k++) {
1979 				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
1980 					break;
1981 				udelay(1);
1982 			}
1983 			if (k == adev->usec_timeout) {
1984 				gfx_v9_0_select_se_sh(adev, 0xffffffff,
1985 						      0xffffffff, 0xffffffff);
1986 				mutex_unlock(&adev->grbm_idx_mutex);
1987 				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
1988 					 i, j);
1989 				return;
1990 			}
1991 		}
1992 	}
1993 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1994 	mutex_unlock(&adev->grbm_idx_mutex);
1995 
1996 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
1997 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
1998 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
1999 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2000 	for (k = 0; k < adev->usec_timeout; k++) {
2001 		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2002 			break;
2003 		udelay(1);
2004 	}
2005 }
2006 
2007 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2008 					       bool enable)
2009 {
2010 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2011 
2012 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2013 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2014 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2015 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2016 
2017 	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2018 }
2019 
2020 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2021 {
2022 	/* csib */
2023 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2024 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2025 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2026 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2027 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2028 			adev->gfx.rlc.clear_state_size);
2029 }
2030 
2031 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2032 				int indirect_offset,
2033 				int list_size,
2034 				int *unique_indirect_regs,
2035 				int unique_indirect_reg_count,
2036 				int *indirect_start_offsets,
2037 				int *indirect_start_offsets_count,
2038 				int max_start_offsets_count)
2039 {
2040 	int idx;
2041 
2042 	for (; indirect_offset < list_size; indirect_offset++) {
2043 		WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2044 		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2045 		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2046 
2047 		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2048 			indirect_offset += 2;
2049 
2050 			/* look for the matching indice */
2051 			for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2052 				if (unique_indirect_regs[idx] ==
2053 					register_list_format[indirect_offset] ||
2054 					!unique_indirect_regs[idx])
2055 					break;
2056 			}
2057 
2058 			BUG_ON(idx >= unique_indirect_reg_count);
2059 
2060 			if (!unique_indirect_regs[idx])
2061 				unique_indirect_regs[idx] = register_list_format[indirect_offset];
2062 
2063 			indirect_offset++;
2064 		}
2065 	}
2066 }
2067 
2068 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2069 {
2070 	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2071 	int unique_indirect_reg_count = 0;
2072 
2073 	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2074 	int indirect_start_offsets_count = 0;
2075 
2076 	int list_size = 0;
2077 	int i = 0, j = 0;
2078 	u32 tmp = 0;
2079 
2080 	u32 *register_list_format =
2081 		kmemdup(adev->gfx.rlc.register_list_format,
2082 			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2083 	if (!register_list_format)
2084 		return -ENOMEM;
2085 
2086 	/* setup unique_indirect_regs array and indirect_start_offsets array */
2087 	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2088 	gfx_v9_1_parse_ind_reg_list(register_list_format,
2089 				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2090 				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2091 				    unique_indirect_regs,
2092 				    unique_indirect_reg_count,
2093 				    indirect_start_offsets,
2094 				    &indirect_start_offsets_count,
2095 				    ARRAY_SIZE(indirect_start_offsets));
2096 
2097 	/* enable auto inc in case it is disabled */
2098 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2099 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2100 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2101 
2102 	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2103 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2104 		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2105 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2106 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2107 			adev->gfx.rlc.register_restore[i]);
2108 
2109 	/* load indirect register */
2110 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2111 		adev->gfx.rlc.reg_list_format_start);
2112 
2113 	/* direct register portion */
2114 	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2115 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2116 			register_list_format[i]);
2117 
2118 	/* indirect register portion */
2119 	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2120 		if (register_list_format[i] == 0xFFFFFFFF) {
2121 			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2122 			continue;
2123 		}
2124 
2125 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2126 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2127 
2128 		for (j = 0; j < unique_indirect_reg_count; j++) {
2129 			if (register_list_format[i] == unique_indirect_regs[j]) {
2130 				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2131 				break;
2132 			}
2133 		}
2134 
2135 		BUG_ON(j >= unique_indirect_reg_count);
2136 
2137 		i++;
2138 	}
2139 
2140 	/* set save/restore list size */
2141 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2142 	list_size = list_size >> 1;
2143 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2144 		adev->gfx.rlc.reg_restore_list_size);
2145 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2146 
2147 	/* write the starting offsets to RLC scratch ram */
2148 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2149 		adev->gfx.rlc.starting_offsets_start);
2150 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2151 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2152 		       indirect_start_offsets[i]);
2153 
2154 	/* load unique indirect regs*/
2155 	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2156 		if (unique_indirect_regs[i] != 0) {
2157 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2158 			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2159 			       unique_indirect_regs[i] & 0x3FFFF);
2160 
2161 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2162 			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2163 			       unique_indirect_regs[i] >> 20);
2164 		}
2165 	}
2166 
2167 	kfree(register_list_format);
2168 	return 0;
2169 }
2170 
2171 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2172 {
2173 	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2174 }
2175 
2176 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2177 					     bool enable)
2178 {
2179 	uint32_t data = 0;
2180 	uint32_t default_data = 0;
2181 
2182 	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2183 	if (enable == true) {
2184 		/* enable GFXIP control over CGPG */
2185 		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2186 		if(default_data != data)
2187 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2188 
2189 		/* update status */
2190 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2191 		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2192 		if(default_data != data)
2193 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2194 	} else {
2195 		/* restore GFXIP control over GCPG */
2196 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2197 		if(default_data != data)
2198 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2199 	}
2200 }
2201 
2202 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2203 {
2204 	uint32_t data = 0;
2205 
2206 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2207 			      AMD_PG_SUPPORT_GFX_SMG |
2208 			      AMD_PG_SUPPORT_GFX_DMG)) {
2209 		/* init IDLE_POLL_COUNT = 60 */
2210 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2211 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2212 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2213 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2214 
2215 		/* init RLC PG Delay */
2216 		data = 0;
2217 		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2218 		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2219 		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2220 		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2221 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2222 
2223 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2224 		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2225 		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2226 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2227 
2228 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2229 		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2230 		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2231 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2232 
2233 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2234 		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2235 
2236 		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2237 		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2238 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2239 
2240 		pwr_10_0_gfxip_control_over_cgpg(adev, true);
2241 	}
2242 }
2243 
2244 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2245 						bool enable)
2246 {
2247 	uint32_t data = 0;
2248 	uint32_t default_data = 0;
2249 
2250 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2251 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2252 			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2253 			     enable ? 1 : 0);
2254 	if (default_data != data)
2255 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2256 }
2257 
2258 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2259 						bool enable)
2260 {
2261 	uint32_t data = 0;
2262 	uint32_t default_data = 0;
2263 
2264 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2265 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2266 			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2267 			     enable ? 1 : 0);
2268 	if(default_data != data)
2269 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2270 }
2271 
2272 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2273 					bool enable)
2274 {
2275 	uint32_t data = 0;
2276 	uint32_t default_data = 0;
2277 
2278 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2279 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2280 			     CP_PG_DISABLE,
2281 			     enable ? 0 : 1);
2282 	if(default_data != data)
2283 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2284 }
2285 
2286 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2287 						bool enable)
2288 {
2289 	uint32_t data, default_data;
2290 
2291 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2292 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2293 			     GFX_POWER_GATING_ENABLE,
2294 			     enable ? 1 : 0);
2295 	if(default_data != data)
2296 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2297 }
2298 
2299 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2300 						bool enable)
2301 {
2302 	uint32_t data, default_data;
2303 
2304 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2305 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2306 			     GFX_PIPELINE_PG_ENABLE,
2307 			     enable ? 1 : 0);
2308 	if(default_data != data)
2309 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2310 
2311 	if (!enable)
2312 		/* read any GFX register to wake up GFX */
2313 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2314 }
2315 
2316 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2317 						       bool enable)
2318 {
2319 	uint32_t data, default_data;
2320 
2321 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2322 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2323 			     STATIC_PER_CU_PG_ENABLE,
2324 			     enable ? 1 : 0);
2325 	if(default_data != data)
2326 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2327 }
2328 
2329 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2330 						bool enable)
2331 {
2332 	uint32_t data, default_data;
2333 
2334 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2335 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2336 			     DYN_PER_CU_PG_ENABLE,
2337 			     enable ? 1 : 0);
2338 	if(default_data != data)
2339 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2340 }
2341 
2342 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2343 {
2344 	gfx_v9_0_init_csb(adev);
2345 
2346 	/*
2347 	 * Rlc save restore list is workable since v2_1.
2348 	 * And it's needed by gfxoff feature.
2349 	 */
2350 	if (adev->gfx.rlc.is_rlc_v2_1) {
2351 		gfx_v9_1_init_rlc_save_restore_list(adev);
2352 		gfx_v9_0_enable_save_restore_machine(adev);
2353 	}
2354 
2355 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2356 			      AMD_PG_SUPPORT_GFX_SMG |
2357 			      AMD_PG_SUPPORT_GFX_DMG |
2358 			      AMD_PG_SUPPORT_CP |
2359 			      AMD_PG_SUPPORT_GDS |
2360 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
2361 		WREG32(mmRLC_JUMP_TABLE_RESTORE,
2362 		       adev->gfx.rlc.cp_table_gpu_addr >> 8);
2363 		gfx_v9_0_init_gfx_power_gating(adev);
2364 	}
2365 }
2366 
2367 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2368 {
2369 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2370 	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2371 	gfx_v9_0_wait_for_rlc_serdes(adev);
2372 }
2373 
2374 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2375 {
2376 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2377 	udelay(50);
2378 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2379 	udelay(50);
2380 }
2381 
2382 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2383 {
2384 #ifdef AMDGPU_RLC_DEBUG_RETRY
2385 	u32 rlc_ucode_ver;
2386 #endif
2387 
2388 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2389 	udelay(50);
2390 
2391 	/* carrizo do enable cp interrupt after cp inited */
2392 	if (!(adev->flags & AMD_IS_APU)) {
2393 		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2394 		udelay(50);
2395 	}
2396 
2397 #ifdef AMDGPU_RLC_DEBUG_RETRY
2398 	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
2399 	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2400 	if(rlc_ucode_ver == 0x108) {
2401 		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2402 				rlc_ucode_ver, adev->gfx.rlc_fw_version);
2403 		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2404 		 * default is 0x9C4 to create a 100us interval */
2405 		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2406 		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2407 		 * to disable the page fault retry interrupts, default is
2408 		 * 0x100 (256) */
2409 		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2410 	}
2411 #endif
2412 }
2413 
2414 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2415 {
2416 	const struct rlc_firmware_header_v2_0 *hdr;
2417 	const __le32 *fw_data;
2418 	unsigned i, fw_size;
2419 
2420 	if (!adev->gfx.rlc_fw)
2421 		return -EINVAL;
2422 
2423 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2424 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
2425 
2426 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2427 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2428 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2429 
2430 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2431 			RLCG_UCODE_LOADING_START_ADDRESS);
2432 	for (i = 0; i < fw_size; i++)
2433 		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2434 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2435 
2436 	return 0;
2437 }
2438 
2439 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2440 {
2441 	int r;
2442 
2443 	if (amdgpu_sriov_vf(adev)) {
2444 		gfx_v9_0_init_csb(adev);
2445 		return 0;
2446 	}
2447 
2448 	adev->gfx.rlc.funcs->stop(adev);
2449 
2450 	/* disable CG */
2451 	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2452 
2453 	gfx_v9_0_init_pg(adev);
2454 
2455 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2456 		/* legacy rlc firmware loading */
2457 		r = gfx_v9_0_rlc_load_microcode(adev);
2458 		if (r)
2459 			return r;
2460 	}
2461 
2462 	switch (adev->asic_type) {
2463 	case CHIP_RAVEN:
2464 		if (amdgpu_lbpw == 0)
2465 			gfx_v9_0_enable_lbpw(adev, false);
2466 		else
2467 			gfx_v9_0_enable_lbpw(adev, true);
2468 		break;
2469 	case CHIP_VEGA20:
2470 		if (amdgpu_lbpw > 0)
2471 			gfx_v9_0_enable_lbpw(adev, true);
2472 		else
2473 			gfx_v9_0_enable_lbpw(adev, false);
2474 		break;
2475 	default:
2476 		break;
2477 	}
2478 
2479 	adev->gfx.rlc.funcs->start(adev);
2480 
2481 	return 0;
2482 }
2483 
2484 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2485 {
2486 	int i;
2487 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
2488 
2489 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2490 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2491 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
2492 	if (!enable) {
2493 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2494 			adev->gfx.gfx_ring[i].sched.ready = false;
2495 	}
2496 	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
2497 	udelay(50);
2498 }
2499 
2500 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2501 {
2502 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
2503 	const struct gfx_firmware_header_v1_0 *ce_hdr;
2504 	const struct gfx_firmware_header_v1_0 *me_hdr;
2505 	const __le32 *fw_data;
2506 	unsigned i, fw_size;
2507 
2508 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2509 		return -EINVAL;
2510 
2511 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2512 		adev->gfx.pfp_fw->data;
2513 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2514 		adev->gfx.ce_fw->data;
2515 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
2516 		adev->gfx.me_fw->data;
2517 
2518 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2519 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2520 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2521 
2522 	gfx_v9_0_cp_gfx_enable(adev, false);
2523 
2524 	/* PFP */
2525 	fw_data = (const __le32 *)
2526 		(adev->gfx.pfp_fw->data +
2527 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2528 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2529 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
2530 	for (i = 0; i < fw_size; i++)
2531 		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2532 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2533 
2534 	/* CE */
2535 	fw_data = (const __le32 *)
2536 		(adev->gfx.ce_fw->data +
2537 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2538 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2539 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
2540 	for (i = 0; i < fw_size; i++)
2541 		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2542 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2543 
2544 	/* ME */
2545 	fw_data = (const __le32 *)
2546 		(adev->gfx.me_fw->data +
2547 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2548 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2549 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
2550 	for (i = 0; i < fw_size; i++)
2551 		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2552 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2553 
2554 	return 0;
2555 }
2556 
2557 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
2558 {
2559 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2560 	const struct cs_section_def *sect = NULL;
2561 	const struct cs_extent_def *ext = NULL;
2562 	int r, i, tmp;
2563 
2564 	/* init the CP */
2565 	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
2566 	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
2567 
2568 	gfx_v9_0_cp_gfx_enable(adev, true);
2569 
2570 	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
2571 	if (r) {
2572 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
2573 		return r;
2574 	}
2575 
2576 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2577 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2578 
2579 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2580 	amdgpu_ring_write(ring, 0x80000000);
2581 	amdgpu_ring_write(ring, 0x80000000);
2582 
2583 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
2584 		for (ext = sect->section; ext->extent != NULL; ++ext) {
2585 			if (sect->id == SECT_CONTEXT) {
2586 				amdgpu_ring_write(ring,
2587 				       PACKET3(PACKET3_SET_CONTEXT_REG,
2588 					       ext->reg_count));
2589 				amdgpu_ring_write(ring,
2590 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
2591 				for (i = 0; i < ext->reg_count; i++)
2592 					amdgpu_ring_write(ring, ext->extent[i]);
2593 			}
2594 		}
2595 	}
2596 
2597 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2598 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2599 
2600 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2601 	amdgpu_ring_write(ring, 0);
2602 
2603 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2604 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2605 	amdgpu_ring_write(ring, 0x8000);
2606 	amdgpu_ring_write(ring, 0x8000);
2607 
2608 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
2609 	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
2610 		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
2611 	amdgpu_ring_write(ring, tmp);
2612 	amdgpu_ring_write(ring, 0);
2613 
2614 	amdgpu_ring_commit(ring);
2615 
2616 	return 0;
2617 }
2618 
2619 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
2620 {
2621 	struct amdgpu_ring *ring;
2622 	u32 tmp;
2623 	u32 rb_bufsz;
2624 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
2625 
2626 	/* Set the write pointer delay */
2627 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
2628 
2629 	/* set the RB to use vmid 0 */
2630 	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
2631 
2632 	/* Set ring buffer size */
2633 	ring = &adev->gfx.gfx_ring[0];
2634 	rb_bufsz = order_base_2(ring->ring_size / 8);
2635 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
2636 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
2637 #ifdef __BIG_ENDIAN
2638 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
2639 #endif
2640 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2641 
2642 	/* Initialize the ring buffer's write pointers */
2643 	ring->wptr = 0;
2644 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
2645 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
2646 
2647 	/* set the wb address wether it's enabled or not */
2648 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2649 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
2650 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
2651 
2652 	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2653 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
2654 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
2655 
2656 	mdelay(1);
2657 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2658 
2659 	rb_addr = ring->gpu_addr >> 8;
2660 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
2661 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
2662 
2663 	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
2664 	if (ring->use_doorbell) {
2665 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2666 				    DOORBELL_OFFSET, ring->doorbell_index);
2667 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2668 				    DOORBELL_EN, 1);
2669 	} else {
2670 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
2671 	}
2672 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
2673 
2674 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
2675 			DOORBELL_RANGE_LOWER, ring->doorbell_index);
2676 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
2677 
2678 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
2679 		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
2680 
2681 
2682 	/* start the ring */
2683 	gfx_v9_0_cp_gfx_start(adev);
2684 	ring->sched.ready = true;
2685 
2686 	return 0;
2687 }
2688 
2689 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
2690 {
2691 	int i;
2692 
2693 	if (enable) {
2694 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
2695 	} else {
2696 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
2697 			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
2698 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
2699 			adev->gfx.compute_ring[i].sched.ready = false;
2700 		adev->gfx.kiq.ring.sched.ready = false;
2701 	}
2702 	udelay(50);
2703 }
2704 
2705 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
2706 {
2707 	const struct gfx_firmware_header_v1_0 *mec_hdr;
2708 	const __le32 *fw_data;
2709 	unsigned i;
2710 	u32 tmp;
2711 
2712 	if (!adev->gfx.mec_fw)
2713 		return -EINVAL;
2714 
2715 	gfx_v9_0_cp_compute_enable(adev, false);
2716 
2717 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
2718 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
2719 
2720 	fw_data = (const __le32 *)
2721 		(adev->gfx.mec_fw->data +
2722 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
2723 	tmp = 0;
2724 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
2725 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2726 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
2727 
2728 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
2729 		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
2730 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
2731 		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
2732 
2733 	/* MEC1 */
2734 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
2735 			 mec_hdr->jt_offset);
2736 	for (i = 0; i < mec_hdr->jt_size; i++)
2737 		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
2738 			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
2739 
2740 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
2741 			adev->gfx.mec_fw_version);
2742 	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
2743 
2744 	return 0;
2745 }
2746 
2747 /* KIQ functions */
2748 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
2749 {
2750 	uint32_t tmp;
2751 	struct amdgpu_device *adev = ring->adev;
2752 
2753 	/* tell RLC which is KIQ queue */
2754 	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
2755 	tmp &= 0xffffff00;
2756 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
2757 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2758 	tmp |= 0x80;
2759 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2760 }
2761 
2762 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
2763 {
2764 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
2765 	uint64_t queue_mask = 0;
2766 	int r, i;
2767 
2768 	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
2769 		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
2770 			continue;
2771 
2772 		/* This situation may be hit in the future if a new HW
2773 		 * generation exposes more than 64 queues. If so, the
2774 		 * definition of queue_mask needs updating */
2775 		if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
2776 			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
2777 			break;
2778 		}
2779 
2780 		queue_mask |= (1ull << i);
2781 	}
2782 
2783 	r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
2784 	if (r) {
2785 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
2786 		return r;
2787 	}
2788 
2789 	/* set resources */
2790 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
2791 	amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
2792 			  PACKET3_SET_RESOURCES_QUEUE_TYPE(0));	/* vmid_mask:0 queue_type:0 (KIQ) */
2793 	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
2794 	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
2795 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
2796 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
2797 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
2798 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
2799 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2800 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
2801 		uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
2802 		uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2803 
2804 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
2805 		/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
2806 		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
2807 				  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
2808 				  PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
2809 				  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
2810 				  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
2811 				  PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
2812 				  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
2813 				  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
2814 				  PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
2815 				  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
2816 		amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
2817 		amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
2818 		amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
2819 		amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
2820 		amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
2821 	}
2822 
2823 	r = amdgpu_ring_test_helper(kiq_ring);
2824 	if (r)
2825 		DRM_ERROR("KCQ enable failed\n");
2826 
2827 	return r;
2828 }
2829 
2830 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
2831 {
2832 	struct amdgpu_device *adev = ring->adev;
2833 	struct v9_mqd *mqd = ring->mqd_ptr;
2834 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
2835 	uint32_t tmp;
2836 
2837 	mqd->header = 0xC0310800;
2838 	mqd->compute_pipelinestat_enable = 0x00000001;
2839 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
2840 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
2841 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
2842 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
2843 	mqd->compute_misc_reserved = 0x00000003;
2844 
2845 	mqd->dynamic_cu_mask_addr_lo =
2846 		lower_32_bits(ring->mqd_gpu_addr
2847 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2848 	mqd->dynamic_cu_mask_addr_hi =
2849 		upper_32_bits(ring->mqd_gpu_addr
2850 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2851 
2852 	eop_base_addr = ring->eop_gpu_addr >> 8;
2853 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
2854 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
2855 
2856 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2857 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
2858 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
2859 			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
2860 
2861 	mqd->cp_hqd_eop_control = tmp;
2862 
2863 	/* enable doorbell? */
2864 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
2865 
2866 	if (ring->use_doorbell) {
2867 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2868 				    DOORBELL_OFFSET, ring->doorbell_index);
2869 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2870 				    DOORBELL_EN, 1);
2871 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2872 				    DOORBELL_SOURCE, 0);
2873 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2874 				    DOORBELL_HIT, 0);
2875 	} else {
2876 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2877 					 DOORBELL_EN, 0);
2878 	}
2879 
2880 	mqd->cp_hqd_pq_doorbell_control = tmp;
2881 
2882 	/* disable the queue if it's active */
2883 	ring->wptr = 0;
2884 	mqd->cp_hqd_dequeue_request = 0;
2885 	mqd->cp_hqd_pq_rptr = 0;
2886 	mqd->cp_hqd_pq_wptr_lo = 0;
2887 	mqd->cp_hqd_pq_wptr_hi = 0;
2888 
2889 	/* set the pointer to the MQD */
2890 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
2891 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
2892 
2893 	/* set MQD vmid to 0 */
2894 	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
2895 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
2896 	mqd->cp_mqd_control = tmp;
2897 
2898 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
2899 	hqd_gpu_addr = ring->gpu_addr >> 8;
2900 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
2901 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
2902 
2903 	/* set up the HQD, this is similar to CP_RB0_CNTL */
2904 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
2905 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
2906 			    (order_base_2(ring->ring_size / 4) - 1));
2907 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
2908 			((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
2909 #ifdef __BIG_ENDIAN
2910 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
2911 #endif
2912 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
2913 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
2914 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
2915 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
2916 	mqd->cp_hqd_pq_control = tmp;
2917 
2918 	/* set the wb address whether it's enabled or not */
2919 	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2920 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
2921 	mqd->cp_hqd_pq_rptr_report_addr_hi =
2922 		upper_32_bits(wb_gpu_addr) & 0xffff;
2923 
2924 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
2925 	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2926 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
2927 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
2928 
2929 	tmp = 0;
2930 	/* enable the doorbell if requested */
2931 	if (ring->use_doorbell) {
2932 		tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
2933 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2934 				DOORBELL_OFFSET, ring->doorbell_index);
2935 
2936 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2937 					 DOORBELL_EN, 1);
2938 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2939 					 DOORBELL_SOURCE, 0);
2940 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2941 					 DOORBELL_HIT, 0);
2942 	}
2943 
2944 	mqd->cp_hqd_pq_doorbell_control = tmp;
2945 
2946 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
2947 	ring->wptr = 0;
2948 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
2949 
2950 	/* set the vmid for the queue */
2951 	mqd->cp_hqd_vmid = 0;
2952 
2953 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
2954 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
2955 	mqd->cp_hqd_persistent_state = tmp;
2956 
2957 	/* set MIN_IB_AVAIL_SIZE */
2958 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
2959 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
2960 	mqd->cp_hqd_ib_control = tmp;
2961 
2962 	/* activate the queue */
2963 	mqd->cp_hqd_active = 1;
2964 
2965 	return 0;
2966 }
2967 
2968 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
2969 {
2970 	struct amdgpu_device *adev = ring->adev;
2971 	struct v9_mqd *mqd = ring->mqd_ptr;
2972 	int j;
2973 
2974 	/* disable wptr polling */
2975 	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
2976 
2977 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
2978 	       mqd->cp_hqd_eop_base_addr_lo);
2979 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
2980 	       mqd->cp_hqd_eop_base_addr_hi);
2981 
2982 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2983 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
2984 	       mqd->cp_hqd_eop_control);
2985 
2986 	/* enable doorbell? */
2987 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
2988 	       mqd->cp_hqd_pq_doorbell_control);
2989 
2990 	/* disable the queue if it's active */
2991 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
2992 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
2993 		for (j = 0; j < adev->usec_timeout; j++) {
2994 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
2995 				break;
2996 			udelay(1);
2997 		}
2998 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
2999 		       mqd->cp_hqd_dequeue_request);
3000 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3001 		       mqd->cp_hqd_pq_rptr);
3002 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3003 		       mqd->cp_hqd_pq_wptr_lo);
3004 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3005 		       mqd->cp_hqd_pq_wptr_hi);
3006 	}
3007 
3008 	/* set the pointer to the MQD */
3009 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3010 	       mqd->cp_mqd_base_addr_lo);
3011 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3012 	       mqd->cp_mqd_base_addr_hi);
3013 
3014 	/* set MQD vmid to 0 */
3015 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3016 	       mqd->cp_mqd_control);
3017 
3018 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3019 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3020 	       mqd->cp_hqd_pq_base_lo);
3021 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3022 	       mqd->cp_hqd_pq_base_hi);
3023 
3024 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3025 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3026 	       mqd->cp_hqd_pq_control);
3027 
3028 	/* set the wb address whether it's enabled or not */
3029 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3030 				mqd->cp_hqd_pq_rptr_report_addr_lo);
3031 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3032 				mqd->cp_hqd_pq_rptr_report_addr_hi);
3033 
3034 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3035 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3036 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
3037 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3038 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3039 
3040 	/* enable the doorbell if requested */
3041 	if (ring->use_doorbell) {
3042 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3043 					(adev->doorbell_index.kiq * 2) << 2);
3044 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3045 					(adev->doorbell_index.userqueue_end * 2) << 2);
3046 	}
3047 
3048 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3049 	       mqd->cp_hqd_pq_doorbell_control);
3050 
3051 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3052 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3053 	       mqd->cp_hqd_pq_wptr_lo);
3054 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3055 	       mqd->cp_hqd_pq_wptr_hi);
3056 
3057 	/* set the vmid for the queue */
3058 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3059 
3060 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3061 	       mqd->cp_hqd_persistent_state);
3062 
3063 	/* activate the queue */
3064 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3065 	       mqd->cp_hqd_active);
3066 
3067 	if (ring->use_doorbell)
3068 		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3069 
3070 	return 0;
3071 }
3072 
3073 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3074 {
3075 	struct amdgpu_device *adev = ring->adev;
3076 	int j;
3077 
3078 	/* disable the queue if it's active */
3079 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3080 
3081 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3082 
3083 		for (j = 0; j < adev->usec_timeout; j++) {
3084 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3085 				break;
3086 			udelay(1);
3087 		}
3088 
3089 		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3090 			DRM_DEBUG("KIQ dequeue request failed.\n");
3091 
3092 			/* Manual disable if dequeue request times out */
3093 			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3094 		}
3095 
3096 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3097 		      0);
3098 	}
3099 
3100 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3101 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3102 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3103 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3104 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3105 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3106 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3107 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3108 
3109 	return 0;
3110 }
3111 
3112 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3113 {
3114 	struct amdgpu_device *adev = ring->adev;
3115 	struct v9_mqd *mqd = ring->mqd_ptr;
3116 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3117 
3118 	gfx_v9_0_kiq_setting(ring);
3119 
3120 	if (adev->in_gpu_reset) { /* for GPU_RESET case */
3121 		/* reset MQD to a clean status */
3122 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3123 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3124 
3125 		/* reset ring buffer */
3126 		ring->wptr = 0;
3127 		amdgpu_ring_clear_ring(ring);
3128 
3129 		mutex_lock(&adev->srbm_mutex);
3130 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3131 		gfx_v9_0_kiq_init_register(ring);
3132 		soc15_grbm_select(adev, 0, 0, 0, 0);
3133 		mutex_unlock(&adev->srbm_mutex);
3134 	} else {
3135 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3136 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3137 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3138 		mutex_lock(&adev->srbm_mutex);
3139 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3140 		gfx_v9_0_mqd_init(ring);
3141 		gfx_v9_0_kiq_init_register(ring);
3142 		soc15_grbm_select(adev, 0, 0, 0, 0);
3143 		mutex_unlock(&adev->srbm_mutex);
3144 
3145 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3146 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3147 	}
3148 
3149 	return 0;
3150 }
3151 
3152 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3153 {
3154 	struct amdgpu_device *adev = ring->adev;
3155 	struct v9_mqd *mqd = ring->mqd_ptr;
3156 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
3157 
3158 	if (!adev->in_gpu_reset && !adev->in_suspend) {
3159 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3160 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3161 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3162 		mutex_lock(&adev->srbm_mutex);
3163 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3164 		gfx_v9_0_mqd_init(ring);
3165 		soc15_grbm_select(adev, 0, 0, 0, 0);
3166 		mutex_unlock(&adev->srbm_mutex);
3167 
3168 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3169 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3170 	} else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3171 		/* reset MQD to a clean status */
3172 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3173 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3174 
3175 		/* reset ring buffer */
3176 		ring->wptr = 0;
3177 		amdgpu_ring_clear_ring(ring);
3178 	} else {
3179 		amdgpu_ring_clear_ring(ring);
3180 	}
3181 
3182 	return 0;
3183 }
3184 
3185 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3186 {
3187 	struct amdgpu_ring *ring;
3188 	int r;
3189 
3190 	ring = &adev->gfx.kiq.ring;
3191 
3192 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
3193 	if (unlikely(r != 0))
3194 		return r;
3195 
3196 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3197 	if (unlikely(r != 0))
3198 		return r;
3199 
3200 	gfx_v9_0_kiq_init_queue(ring);
3201 	amdgpu_bo_kunmap(ring->mqd_obj);
3202 	ring->mqd_ptr = NULL;
3203 	amdgpu_bo_unreserve(ring->mqd_obj);
3204 	ring->sched.ready = true;
3205 	return 0;
3206 }
3207 
3208 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3209 {
3210 	struct amdgpu_ring *ring = NULL;
3211 	int r = 0, i;
3212 
3213 	gfx_v9_0_cp_compute_enable(adev, true);
3214 
3215 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3216 		ring = &adev->gfx.compute_ring[i];
3217 
3218 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3219 		if (unlikely(r != 0))
3220 			goto done;
3221 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3222 		if (!r) {
3223 			r = gfx_v9_0_kcq_init_queue(ring);
3224 			amdgpu_bo_kunmap(ring->mqd_obj);
3225 			ring->mqd_ptr = NULL;
3226 		}
3227 		amdgpu_bo_unreserve(ring->mqd_obj);
3228 		if (r)
3229 			goto done;
3230 	}
3231 
3232 	r = gfx_v9_0_kiq_kcq_enable(adev);
3233 done:
3234 	return r;
3235 }
3236 
3237 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3238 {
3239 	int r, i;
3240 	struct amdgpu_ring *ring;
3241 
3242 	if (!(adev->flags & AMD_IS_APU))
3243 		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3244 
3245 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3246 		/* legacy firmware loading */
3247 		r = gfx_v9_0_cp_gfx_load_microcode(adev);
3248 		if (r)
3249 			return r;
3250 
3251 		r = gfx_v9_0_cp_compute_load_microcode(adev);
3252 		if (r)
3253 			return r;
3254 	}
3255 
3256 	r = gfx_v9_0_kiq_resume(adev);
3257 	if (r)
3258 		return r;
3259 
3260 	r = gfx_v9_0_cp_gfx_resume(adev);
3261 	if (r)
3262 		return r;
3263 
3264 	r = gfx_v9_0_kcq_resume(adev);
3265 	if (r)
3266 		return r;
3267 
3268 	ring = &adev->gfx.gfx_ring[0];
3269 	r = amdgpu_ring_test_helper(ring);
3270 	if (r)
3271 		return r;
3272 
3273 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3274 		ring = &adev->gfx.compute_ring[i];
3275 		amdgpu_ring_test_helper(ring);
3276 	}
3277 
3278 	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3279 
3280 	return 0;
3281 }
3282 
3283 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3284 {
3285 	gfx_v9_0_cp_gfx_enable(adev, enable);
3286 	gfx_v9_0_cp_compute_enable(adev, enable);
3287 }
3288 
3289 static int gfx_v9_0_hw_init(void *handle)
3290 {
3291 	int r;
3292 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3293 
3294 	gfx_v9_0_init_golden_registers(adev);
3295 
3296 	gfx_v9_0_constants_init(adev);
3297 
3298 	r = gfx_v9_0_csb_vram_pin(adev);
3299 	if (r)
3300 		return r;
3301 
3302 	r = adev->gfx.rlc.funcs->resume(adev);
3303 	if (r)
3304 		return r;
3305 
3306 	r = gfx_v9_0_cp_resume(adev);
3307 	if (r)
3308 		return r;
3309 
3310 	r = gfx_v9_0_ngg_en(adev);
3311 	if (r)
3312 		return r;
3313 
3314 	return r;
3315 }
3316 
3317 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
3318 {
3319 	int r, i;
3320 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3321 
3322 	r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
3323 	if (r)
3324 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3325 
3326 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3327 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3328 
3329 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
3330 		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3331 						PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
3332 						PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
3333 						PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
3334 						PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
3335 		amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
3336 		amdgpu_ring_write(kiq_ring, 0);
3337 		amdgpu_ring_write(kiq_ring, 0);
3338 		amdgpu_ring_write(kiq_ring, 0);
3339 	}
3340 	r = amdgpu_ring_test_helper(kiq_ring);
3341 	if (r)
3342 		DRM_ERROR("KCQ disable failed\n");
3343 
3344 	return r;
3345 }
3346 
3347 static int gfx_v9_0_hw_fini(void *handle)
3348 {
3349 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3350 
3351 	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3352 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3353 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3354 
3355 	/* disable KCQ to avoid CPC touch memory not valid anymore */
3356 	gfx_v9_0_kcq_disable(adev);
3357 
3358 	if (amdgpu_sriov_vf(adev)) {
3359 		gfx_v9_0_cp_gfx_enable(adev, false);
3360 		/* must disable polling for SRIOV when hw finished, otherwise
3361 		 * CPC engine may still keep fetching WB address which is already
3362 		 * invalid after sw finished and trigger DMAR reading error in
3363 		 * hypervisor side.
3364 		 */
3365 		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3366 		return 0;
3367 	}
3368 
3369 	/* Use deinitialize sequence from CAIL when unbinding device from driver,
3370 	 * otherwise KIQ is hanging when binding back
3371 	 */
3372 	if (!adev->in_gpu_reset && !adev->in_suspend) {
3373 		mutex_lock(&adev->srbm_mutex);
3374 		soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3375 				adev->gfx.kiq.ring.pipe,
3376 				adev->gfx.kiq.ring.queue, 0);
3377 		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3378 		soc15_grbm_select(adev, 0, 0, 0, 0);
3379 		mutex_unlock(&adev->srbm_mutex);
3380 	}
3381 
3382 	gfx_v9_0_cp_enable(adev, false);
3383 	adev->gfx.rlc.funcs->stop(adev);
3384 
3385 	gfx_v9_0_csb_vram_unpin(adev);
3386 
3387 	return 0;
3388 }
3389 
3390 static int gfx_v9_0_suspend(void *handle)
3391 {
3392 	return gfx_v9_0_hw_fini(handle);
3393 }
3394 
3395 static int gfx_v9_0_resume(void *handle)
3396 {
3397 	return gfx_v9_0_hw_init(handle);
3398 }
3399 
3400 static bool gfx_v9_0_is_idle(void *handle)
3401 {
3402 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3403 
3404 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3405 				GRBM_STATUS, GUI_ACTIVE))
3406 		return false;
3407 	else
3408 		return true;
3409 }
3410 
3411 static int gfx_v9_0_wait_for_idle(void *handle)
3412 {
3413 	unsigned i;
3414 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3415 
3416 	for (i = 0; i < adev->usec_timeout; i++) {
3417 		if (gfx_v9_0_is_idle(handle))
3418 			return 0;
3419 		udelay(1);
3420 	}
3421 	return -ETIMEDOUT;
3422 }
3423 
3424 static int gfx_v9_0_soft_reset(void *handle)
3425 {
3426 	u32 grbm_soft_reset = 0;
3427 	u32 tmp;
3428 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3429 
3430 	/* GRBM_STATUS */
3431 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3432 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3433 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3434 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3435 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3436 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3437 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3438 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3439 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3440 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3441 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3442 	}
3443 
3444 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3445 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3446 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3447 	}
3448 
3449 	/* GRBM_STATUS2 */
3450 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3451 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3452 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3453 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3454 
3455 
3456 	if (grbm_soft_reset) {
3457 		/* stop the rlc */
3458 		adev->gfx.rlc.funcs->stop(adev);
3459 
3460 		/* Disable GFX parsing/prefetching */
3461 		gfx_v9_0_cp_gfx_enable(adev, false);
3462 
3463 		/* Disable MEC parsing/prefetching */
3464 		gfx_v9_0_cp_compute_enable(adev, false);
3465 
3466 		if (grbm_soft_reset) {
3467 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3468 			tmp |= grbm_soft_reset;
3469 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3470 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3471 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3472 
3473 			udelay(50);
3474 
3475 			tmp &= ~grbm_soft_reset;
3476 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3477 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3478 		}
3479 
3480 		/* Wait a little for things to settle down */
3481 		udelay(50);
3482 	}
3483 	return 0;
3484 }
3485 
3486 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3487 {
3488 	uint64_t clock;
3489 
3490 	mutex_lock(&adev->gfx.gpu_clock_mutex);
3491 	WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
3492 	clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
3493 		((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
3494 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
3495 	return clock;
3496 }
3497 
3498 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
3499 					  uint32_t vmid,
3500 					  uint32_t gds_base, uint32_t gds_size,
3501 					  uint32_t gws_base, uint32_t gws_size,
3502 					  uint32_t oa_base, uint32_t oa_size)
3503 {
3504 	struct amdgpu_device *adev = ring->adev;
3505 
3506 	/* GDS Base */
3507 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3508 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
3509 				   gds_base);
3510 
3511 	/* GDS Size */
3512 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3513 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
3514 				   gds_size);
3515 
3516 	/* GWS */
3517 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3518 				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
3519 				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
3520 
3521 	/* OA */
3522 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3523 				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
3524 				   (1 << (oa_size + oa_base)) - (1 << oa_base));
3525 }
3526 
3527 static const u32 vgpr_init_compute_shader[] =
3528 {
3529 	0xb07c0000, 0xbe8000ff,
3530 	0x000000f8, 0xbf110800,
3531 	0x7e000280, 0x7e020280,
3532 	0x7e040280, 0x7e060280,
3533 	0x7e080280, 0x7e0a0280,
3534 	0x7e0c0280, 0x7e0e0280,
3535 	0x80808800, 0xbe803200,
3536 	0xbf84fff5, 0xbf9c0000,
3537 	0xd28c0001, 0x0001007f,
3538 	0xd28d0001, 0x0002027e,
3539 	0x10020288, 0xb8810904,
3540 	0xb7814000, 0xd1196a01,
3541 	0x00000301, 0xbe800087,
3542 	0xbefc00c1, 0xd89c4000,
3543 	0x00020201, 0xd89cc080,
3544 	0x00040401, 0x320202ff,
3545 	0x00000800, 0x80808100,
3546 	0xbf84fff8, 0x7e020280,
3547 	0xbf810000, 0x00000000,
3548 };
3549 
3550 static const u32 sgpr_init_compute_shader[] =
3551 {
3552 	0xb07c0000, 0xbe8000ff,
3553 	0x0000005f, 0xbee50080,
3554 	0xbe812c65, 0xbe822c65,
3555 	0xbe832c65, 0xbe842c65,
3556 	0xbe852c65, 0xb77c0005,
3557 	0x80808500, 0xbf84fff8,
3558 	0xbe800080, 0xbf810000,
3559 };
3560 
3561 static const struct soc15_reg_entry vgpr_init_regs[] = {
3562    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3563    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3564    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3565    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3566    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3567    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3568    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3569    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3570    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
3571    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
3572 };
3573 
3574 static const struct soc15_reg_entry sgpr_init_regs[] = {
3575    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3576    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3577    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3578    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3579    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3580    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3581    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3582    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3583    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
3584    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
3585 };
3586 
3587 static const struct soc15_reg_entry sec_ded_counter_registers[] = {
3588    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
3589    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
3590    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
3591    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
3592    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
3593    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
3594    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
3595    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
3596    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
3597    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
3598    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
3599    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
3600    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
3601    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
3602    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
3603    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
3604    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
3605    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
3606    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
3607    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
3608    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
3609    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
3610    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
3611    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
3612    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
3613    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
3614    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
3615    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
3616    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
3617    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
3618    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
3619    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
3620 };
3621 
3622 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
3623 {
3624 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
3625 	int i, r;
3626 
3627 	r = amdgpu_ring_alloc(ring, 7);
3628 	if (r) {
3629 		DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
3630 			ring->name, r);
3631 		return r;
3632 	}
3633 
3634 	WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
3635 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
3636 
3637 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3638 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
3639 				PACKET3_DMA_DATA_DST_SEL(1) |
3640 				PACKET3_DMA_DATA_SRC_SEL(2) |
3641 				PACKET3_DMA_DATA_ENGINE(0)));
3642 	amdgpu_ring_write(ring, 0);
3643 	amdgpu_ring_write(ring, 0);
3644 	amdgpu_ring_write(ring, 0);
3645 	amdgpu_ring_write(ring, 0);
3646 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
3647 				adev->gds.gds_size);
3648 
3649 	amdgpu_ring_commit(ring);
3650 
3651 	for (i = 0; i < adev->usec_timeout; i++) {
3652 		if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
3653 			break;
3654 		udelay(1);
3655 	}
3656 
3657 	if (i >= adev->usec_timeout)
3658 		r = -ETIMEDOUT;
3659 
3660 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
3661 
3662 	return r;
3663 }
3664 
3665 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
3666 {
3667 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
3668 	struct amdgpu_ib ib;
3669 	struct dma_fence *f = NULL;
3670 	int r, i, j, k;
3671 	unsigned total_size, vgpr_offset, sgpr_offset;
3672 	u64 gpu_addr;
3673 
3674 	/* only support when RAS is enabled */
3675 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
3676 		return 0;
3677 
3678 	/* bail if the compute ring is not ready */
3679 	if (!ring->sched.ready)
3680 		return 0;
3681 
3682 	total_size =
3683 		((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
3684 	total_size +=
3685 		((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
3686 	total_size = ALIGN(total_size, 256);
3687 	vgpr_offset = total_size;
3688 	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
3689 	sgpr_offset = total_size;
3690 	total_size += sizeof(sgpr_init_compute_shader);
3691 
3692 	/* allocate an indirect buffer to put the commands in */
3693 	memset(&ib, 0, sizeof(ib));
3694 	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
3695 	if (r) {
3696 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
3697 		return r;
3698 	}
3699 
3700 	/* load the compute shaders */
3701 	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
3702 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
3703 
3704 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
3705 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
3706 
3707 	/* init the ib length to 0 */
3708 	ib.length_dw = 0;
3709 
3710 	/* VGPR */
3711 	/* write the register state for the compute dispatch */
3712 	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
3713 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
3714 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
3715 								- PACKET3_SET_SH_REG_START;
3716 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
3717 	}
3718 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
3719 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
3720 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
3721 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
3722 							- PACKET3_SET_SH_REG_START;
3723 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
3724 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
3725 
3726 	/* write dispatch packet */
3727 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
3728 	ib.ptr[ib.length_dw++] = 128; /* x */
3729 	ib.ptr[ib.length_dw++] = 1; /* y */
3730 	ib.ptr[ib.length_dw++] = 1; /* z */
3731 	ib.ptr[ib.length_dw++] =
3732 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
3733 
3734 	/* write CS partial flush packet */
3735 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
3736 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
3737 
3738 	/* SGPR */
3739 	/* write the register state for the compute dispatch */
3740 	for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
3741 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
3742 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
3743 								- PACKET3_SET_SH_REG_START;
3744 		ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
3745 	}
3746 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
3747 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
3748 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
3749 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
3750 							- PACKET3_SET_SH_REG_START;
3751 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
3752 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
3753 
3754 	/* write dispatch packet */
3755 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
3756 	ib.ptr[ib.length_dw++] = 128; /* x */
3757 	ib.ptr[ib.length_dw++] = 1; /* y */
3758 	ib.ptr[ib.length_dw++] = 1; /* z */
3759 	ib.ptr[ib.length_dw++] =
3760 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
3761 
3762 	/* write CS partial flush packet */
3763 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
3764 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
3765 
3766 	/* shedule the ib on the ring */
3767 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
3768 	if (r) {
3769 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
3770 		goto fail;
3771 	}
3772 
3773 	/* wait for the GPU to finish processing the IB */
3774 	r = dma_fence_wait(f, false);
3775 	if (r) {
3776 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
3777 		goto fail;
3778 	}
3779 
3780 	/* read back registers to clear the counters */
3781 	mutex_lock(&adev->grbm_idx_mutex);
3782 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
3783 		for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
3784 			for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
3785 				gfx_v9_0_select_se_sh(adev, j, 0x0, k);
3786 				RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
3787 			}
3788 		}
3789 	}
3790 	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
3791 	mutex_unlock(&adev->grbm_idx_mutex);
3792 
3793 fail:
3794 	amdgpu_ib_free(adev, &ib, NULL);
3795 	dma_fence_put(f);
3796 
3797 	return r;
3798 }
3799 
3800 static int gfx_v9_0_early_init(void *handle)
3801 {
3802 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3803 
3804 	adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
3805 	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
3806 	gfx_v9_0_set_ring_funcs(adev);
3807 	gfx_v9_0_set_irq_funcs(adev);
3808 	gfx_v9_0_set_gds_init(adev);
3809 	gfx_v9_0_set_rlc_funcs(adev);
3810 
3811 	return 0;
3812 }
3813 
3814 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
3815 		struct amdgpu_iv_entry *entry);
3816 
3817 static int gfx_v9_0_ecc_late_init(void *handle)
3818 {
3819 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3820 	struct ras_common_if **ras_if = &adev->gfx.ras_if;
3821 	struct ras_ih_if ih_info = {
3822 		.cb = gfx_v9_0_process_ras_data_cb,
3823 	};
3824 	struct ras_fs_if fs_info = {
3825 		.sysfs_name = "gfx_err_count",
3826 		.debugfs_name = "gfx_err_inject",
3827 	};
3828 	struct ras_common_if ras_block = {
3829 		.block = AMDGPU_RAS_BLOCK__GFX,
3830 		.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
3831 		.sub_block_index = 0,
3832 		.name = "gfx",
3833 	};
3834 	int r;
3835 
3836 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
3837 		amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
3838 		return 0;
3839 	}
3840 
3841 	r = gfx_v9_0_do_edc_gds_workarounds(adev);
3842 	if (r)
3843 		return r;
3844 
3845 	/* requires IBs so do in late init after IB pool is initialized */
3846 	r = gfx_v9_0_do_edc_gpr_workarounds(adev);
3847 	if (r)
3848 		return r;
3849 
3850 	/* handle resume path. */
3851 	if (*ras_if) {
3852 		/* resend ras TA enable cmd during resume.
3853 		 * prepare to handle failure.
3854 		 */
3855 		ih_info.head = **ras_if;
3856 		r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
3857 		if (r) {
3858 			if (r == -EAGAIN) {
3859 				/* request a gpu reset. will run again. */
3860 				amdgpu_ras_request_reset_on_boot(adev,
3861 						AMDGPU_RAS_BLOCK__GFX);
3862 				return 0;
3863 			}
3864 			/* fail to enable ras, cleanup all. */
3865 			goto irq;
3866 		}
3867 		/* enable successfully. continue. */
3868 		goto resume;
3869 	}
3870 
3871 	*ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
3872 	if (!*ras_if)
3873 		return -ENOMEM;
3874 
3875 	**ras_if = ras_block;
3876 
3877 	r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
3878 	if (r) {
3879 		if (r == -EAGAIN) {
3880 			amdgpu_ras_request_reset_on_boot(adev,
3881 					AMDGPU_RAS_BLOCK__GFX);
3882 			r = 0;
3883 		}
3884 		goto feature;
3885 	}
3886 
3887 	ih_info.head = **ras_if;
3888 	fs_info.head = **ras_if;
3889 
3890 	r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
3891 	if (r)
3892 		goto interrupt;
3893 
3894 	amdgpu_ras_debugfs_create(adev, &fs_info);
3895 
3896 	r = amdgpu_ras_sysfs_create(adev, &fs_info);
3897 	if (r)
3898 		goto sysfs;
3899 resume:
3900 	r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
3901 	if (r)
3902 		goto irq;
3903 
3904 	return 0;
3905 irq:
3906 	amdgpu_ras_sysfs_remove(adev, *ras_if);
3907 sysfs:
3908 	amdgpu_ras_debugfs_remove(adev, *ras_if);
3909 	amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
3910 interrupt:
3911 	amdgpu_ras_feature_enable(adev, *ras_if, 0);
3912 feature:
3913 	kfree(*ras_if);
3914 	*ras_if = NULL;
3915 	return r;
3916 }
3917 
3918 static int gfx_v9_0_late_init(void *handle)
3919 {
3920 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3921 	int r;
3922 
3923 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
3924 	if (r)
3925 		return r;
3926 
3927 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
3928 	if (r)
3929 		return r;
3930 
3931 	r = gfx_v9_0_ecc_late_init(handle);
3932 	if (r)
3933 		return r;
3934 
3935 	return 0;
3936 }
3937 
3938 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
3939 {
3940 	uint32_t rlc_setting;
3941 
3942 	/* if RLC is not enabled, do nothing */
3943 	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
3944 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
3945 		return false;
3946 
3947 	return true;
3948 }
3949 
3950 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
3951 {
3952 	uint32_t data;
3953 	unsigned i;
3954 
3955 	data = RLC_SAFE_MODE__CMD_MASK;
3956 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
3957 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
3958 
3959 	/* wait for RLC_SAFE_MODE */
3960 	for (i = 0; i < adev->usec_timeout; i++) {
3961 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
3962 			break;
3963 		udelay(1);
3964 	}
3965 }
3966 
3967 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
3968 {
3969 	uint32_t data;
3970 
3971 	data = RLC_SAFE_MODE__CMD_MASK;
3972 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
3973 }
3974 
3975 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
3976 						bool enable)
3977 {
3978 	amdgpu_gfx_rlc_enter_safe_mode(adev);
3979 
3980 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
3981 		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
3982 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
3983 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
3984 	} else {
3985 		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
3986 		gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
3987 	}
3988 
3989 	amdgpu_gfx_rlc_exit_safe_mode(adev);
3990 }
3991 
3992 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
3993 						bool enable)
3994 {
3995 	/* TODO: double check if we need to perform under safe mode */
3996 	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
3997 
3998 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
3999 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4000 	else
4001 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4002 
4003 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4004 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4005 	else
4006 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4007 
4008 	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
4009 }
4010 
4011 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4012 						      bool enable)
4013 {
4014 	uint32_t data, def;
4015 
4016 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4017 
4018 	/* It is disabled by HW by default */
4019 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4020 		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
4021 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4022 
4023 		if (adev->asic_type != CHIP_VEGA12)
4024 			data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4025 
4026 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4027 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4028 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4029 
4030 		/* only for Vega10 & Raven1 */
4031 		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4032 
4033 		if (def != data)
4034 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4035 
4036 		/* MGLS is a global flag to control all MGLS in GFX */
4037 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4038 			/* 2 - RLC memory Light sleep */
4039 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4040 				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4041 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4042 				if (def != data)
4043 					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4044 			}
4045 			/* 3 - CP memory Light sleep */
4046 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4047 				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4048 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4049 				if (def != data)
4050 					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4051 			}
4052 		}
4053 	} else {
4054 		/* 1 - MGCG_OVERRIDE */
4055 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4056 
4057 		if (adev->asic_type != CHIP_VEGA12)
4058 			data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4059 
4060 		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4061 			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4062 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4063 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4064 
4065 		if (def != data)
4066 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4067 
4068 		/* 2 - disable MGLS in RLC */
4069 		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4070 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4071 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4072 			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4073 		}
4074 
4075 		/* 3 - disable MGLS in CP */
4076 		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4077 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4078 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4079 			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4080 		}
4081 	}
4082 
4083 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4084 }
4085 
4086 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4087 					   bool enable)
4088 {
4089 	uint32_t data, def;
4090 
4091 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4092 
4093 	/* Enable 3D CGCG/CGLS */
4094 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4095 		/* write cmd to clear cgcg/cgls ov */
4096 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4097 		/* unset CGCG override */
4098 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4099 		/* update CGCG and CGLS override bits */
4100 		if (def != data)
4101 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4102 
4103 		/* enable 3Dcgcg FSM(0x0000363f) */
4104 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4105 
4106 		data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4107 			RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4108 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4109 			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4110 				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4111 		if (def != data)
4112 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4113 
4114 		/* set IDLE_POLL_COUNT(0x00900100) */
4115 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4116 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4117 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4118 		if (def != data)
4119 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4120 	} else {
4121 		/* Disable CGCG/CGLS */
4122 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4123 		/* disable cgcg, cgls should be disabled */
4124 		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4125 			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4126 		/* disable cgcg and cgls in FSM */
4127 		if (def != data)
4128 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4129 	}
4130 
4131 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4132 }
4133 
4134 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4135 						      bool enable)
4136 {
4137 	uint32_t def, data;
4138 
4139 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4140 
4141 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4142 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4143 		/* unset CGCG override */
4144 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4145 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4146 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4147 		else
4148 			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4149 		/* update CGCG and CGLS override bits */
4150 		if (def != data)
4151 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4152 
4153 		/* enable cgcg FSM(0x0000363F) */
4154 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4155 
4156 		data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4157 			RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4158 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4159 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4160 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4161 		if (def != data)
4162 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4163 
4164 		/* set IDLE_POLL_COUNT(0x00900100) */
4165 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4166 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4167 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4168 		if (def != data)
4169 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4170 	} else {
4171 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4172 		/* reset CGCG/CGLS bits */
4173 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4174 		/* disable cgcg and cgls in FSM */
4175 		if (def != data)
4176 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4177 	}
4178 
4179 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4180 }
4181 
4182 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4183 					    bool enable)
4184 {
4185 	if (enable) {
4186 		/* CGCG/CGLS should be enabled after MGCG/MGLS
4187 		 * ===  MGCG + MGLS ===
4188 		 */
4189 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4190 		/* ===  CGCG /CGLS for GFX 3D Only === */
4191 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4192 		/* ===  CGCG + CGLS === */
4193 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4194 	} else {
4195 		/* CGCG/CGLS should be disabled before MGCG/MGLS
4196 		 * ===  CGCG + CGLS ===
4197 		 */
4198 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4199 		/* ===  CGCG /CGLS for GFX 3D Only === */
4200 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4201 		/* ===  MGCG + MGLS === */
4202 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4203 	}
4204 	return 0;
4205 }
4206 
4207 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4208 	.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4209 	.set_safe_mode = gfx_v9_0_set_safe_mode,
4210 	.unset_safe_mode = gfx_v9_0_unset_safe_mode,
4211 	.init = gfx_v9_0_rlc_init,
4212 	.get_csb_size = gfx_v9_0_get_csb_size,
4213 	.get_csb_buffer = gfx_v9_0_get_csb_buffer,
4214 	.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4215 	.resume = gfx_v9_0_rlc_resume,
4216 	.stop = gfx_v9_0_rlc_stop,
4217 	.reset = gfx_v9_0_rlc_reset,
4218 	.start = gfx_v9_0_rlc_start
4219 };
4220 
4221 static int gfx_v9_0_set_powergating_state(void *handle,
4222 					  enum amd_powergating_state state)
4223 {
4224 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4225 	bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
4226 
4227 	switch (adev->asic_type) {
4228 	case CHIP_RAVEN:
4229 		if (!enable) {
4230 			amdgpu_gfx_off_ctrl(adev, false);
4231 			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4232 		}
4233 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4234 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4235 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4236 		} else {
4237 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4238 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4239 		}
4240 
4241 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4242 			gfx_v9_0_enable_cp_power_gating(adev, true);
4243 		else
4244 			gfx_v9_0_enable_cp_power_gating(adev, false);
4245 
4246 		/* update gfx cgpg state */
4247 		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4248 
4249 		/* update mgcg state */
4250 		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4251 
4252 		if (enable)
4253 			amdgpu_gfx_off_ctrl(adev, true);
4254 		break;
4255 	case CHIP_VEGA12:
4256 		if (!enable) {
4257 			amdgpu_gfx_off_ctrl(adev, false);
4258 			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4259 		} else {
4260 			amdgpu_gfx_off_ctrl(adev, true);
4261 		}
4262 		break;
4263 	default:
4264 		break;
4265 	}
4266 
4267 	return 0;
4268 }
4269 
4270 static int gfx_v9_0_set_clockgating_state(void *handle,
4271 					  enum amd_clockgating_state state)
4272 {
4273 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4274 
4275 	if (amdgpu_sriov_vf(adev))
4276 		return 0;
4277 
4278 	switch (adev->asic_type) {
4279 	case CHIP_VEGA10:
4280 	case CHIP_VEGA12:
4281 	case CHIP_VEGA20:
4282 	case CHIP_RAVEN:
4283 		gfx_v9_0_update_gfx_clock_gating(adev,
4284 						 state == AMD_CG_STATE_GATE ? true : false);
4285 		break;
4286 	default:
4287 		break;
4288 	}
4289 	return 0;
4290 }
4291 
4292 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4293 {
4294 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4295 	int data;
4296 
4297 	if (amdgpu_sriov_vf(adev))
4298 		*flags = 0;
4299 
4300 	/* AMD_CG_SUPPORT_GFX_MGCG */
4301 	data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4302 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4303 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
4304 
4305 	/* AMD_CG_SUPPORT_GFX_CGCG */
4306 	data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4307 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4308 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
4309 
4310 	/* AMD_CG_SUPPORT_GFX_CGLS */
4311 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4312 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
4313 
4314 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
4315 	data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4316 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4317 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4318 
4319 	/* AMD_CG_SUPPORT_GFX_CP_LS */
4320 	data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4321 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4322 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4323 
4324 	/* AMD_CG_SUPPORT_GFX_3D_CGCG */
4325 	data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4326 	if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4327 		*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4328 
4329 	/* AMD_CG_SUPPORT_GFX_3D_CGLS */
4330 	if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4331 		*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4332 }
4333 
4334 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4335 {
4336 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4337 }
4338 
4339 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4340 {
4341 	struct amdgpu_device *adev = ring->adev;
4342 	u64 wptr;
4343 
4344 	/* XXX check if swapping is necessary on BE */
4345 	if (ring->use_doorbell) {
4346 		wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4347 	} else {
4348 		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4349 		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4350 	}
4351 
4352 	return wptr;
4353 }
4354 
4355 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4356 {
4357 	struct amdgpu_device *adev = ring->adev;
4358 
4359 	if (ring->use_doorbell) {
4360 		/* XXX check if swapping is necessary on BE */
4361 		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4362 		WDOORBELL64(ring->doorbell_index, ring->wptr);
4363 	} else {
4364 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4365 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4366 	}
4367 }
4368 
4369 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4370 {
4371 	struct amdgpu_device *adev = ring->adev;
4372 	u32 ref_and_mask, reg_mem_engine;
4373 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
4374 
4375 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4376 		switch (ring->me) {
4377 		case 1:
4378 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4379 			break;
4380 		case 2:
4381 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4382 			break;
4383 		default:
4384 			return;
4385 		}
4386 		reg_mem_engine = 0;
4387 	} else {
4388 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4389 		reg_mem_engine = 1; /* pfp */
4390 	}
4391 
4392 	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4393 			      adev->nbio_funcs->get_hdp_flush_req_offset(adev),
4394 			      adev->nbio_funcs->get_hdp_flush_done_offset(adev),
4395 			      ref_and_mask, ref_and_mask, 0x20);
4396 }
4397 
4398 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4399 					struct amdgpu_job *job,
4400 					struct amdgpu_ib *ib,
4401 					uint32_t flags)
4402 {
4403 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4404 	u32 header, control = 0;
4405 
4406 	if (ib->flags & AMDGPU_IB_FLAG_CE)
4407 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4408 	else
4409 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4410 
4411 	control |= ib->length_dw | (vmid << 24);
4412 
4413 	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
4414 		control |= INDIRECT_BUFFER_PRE_ENB(1);
4415 
4416 		if (!(ib->flags & AMDGPU_IB_FLAG_CE))
4417 			gfx_v9_0_ring_emit_de_meta(ring);
4418 	}
4419 
4420 	amdgpu_ring_write(ring, header);
4421 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4422 	amdgpu_ring_write(ring,
4423 #ifdef __BIG_ENDIAN
4424 		(2 << 0) |
4425 #endif
4426 		lower_32_bits(ib->gpu_addr));
4427 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4428 	amdgpu_ring_write(ring, control);
4429 }
4430 
4431 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4432 					  struct amdgpu_job *job,
4433 					  struct amdgpu_ib *ib,
4434 					  uint32_t flags)
4435 {
4436 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4437 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
4438 
4439 	/* Currently, there is a high possibility to get wave ID mismatch
4440 	 * between ME and GDS, leading to a hw deadlock, because ME generates
4441 	 * different wave IDs than the GDS expects. This situation happens
4442 	 * randomly when at least 5 compute pipes use GDS ordered append.
4443 	 * The wave IDs generated by ME are also wrong after suspend/resume.
4444 	 * Those are probably bugs somewhere else in the kernel driver.
4445 	 *
4446 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
4447 	 * GDS to 0 for this ring (me/pipe).
4448 	 */
4449 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
4450 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
4451 		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
4452 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
4453 	}
4454 
4455 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
4456 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4457 	amdgpu_ring_write(ring,
4458 #ifdef __BIG_ENDIAN
4459 				(2 << 0) |
4460 #endif
4461 				lower_32_bits(ib->gpu_addr));
4462 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4463 	amdgpu_ring_write(ring, control);
4464 }
4465 
4466 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
4467 				     u64 seq, unsigned flags)
4468 {
4469 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4470 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4471 	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
4472 
4473 	/* RELEASE_MEM - flush caches, send int */
4474 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
4475 	amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
4476 					       EOP_TC_NC_ACTION_EN) :
4477 					      (EOP_TCL1_ACTION_EN |
4478 					       EOP_TC_ACTION_EN |
4479 					       EOP_TC_WB_ACTION_EN |
4480 					       EOP_TC_MD_ACTION_EN)) |
4481 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4482 				 EVENT_INDEX(5)));
4483 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4484 
4485 	/*
4486 	 * the address should be Qword aligned if 64bit write, Dword
4487 	 * aligned if only send 32bit data low (discard data high)
4488 	 */
4489 	if (write64bit)
4490 		BUG_ON(addr & 0x7);
4491 	else
4492 		BUG_ON(addr & 0x3);
4493 	amdgpu_ring_write(ring, lower_32_bits(addr));
4494 	amdgpu_ring_write(ring, upper_32_bits(addr));
4495 	amdgpu_ring_write(ring, lower_32_bits(seq));
4496 	amdgpu_ring_write(ring, upper_32_bits(seq));
4497 	amdgpu_ring_write(ring, 0);
4498 }
4499 
4500 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
4501 {
4502 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4503 	uint32_t seq = ring->fence_drv.sync_seq;
4504 	uint64_t addr = ring->fence_drv.gpu_addr;
4505 
4506 	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
4507 			      lower_32_bits(addr), upper_32_bits(addr),
4508 			      seq, 0xffffffff, 4);
4509 }
4510 
4511 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4512 					unsigned vmid, uint64_t pd_addr)
4513 {
4514 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
4515 
4516 	/* compute doesn't have PFP */
4517 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
4518 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
4519 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4520 		amdgpu_ring_write(ring, 0x0);
4521 	}
4522 }
4523 
4524 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4525 {
4526 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
4527 }
4528 
4529 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4530 {
4531 	u64 wptr;
4532 
4533 	/* XXX check if swapping is necessary on BE */
4534 	if (ring->use_doorbell)
4535 		wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
4536 	else
4537 		BUG();
4538 	return wptr;
4539 }
4540 
4541 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
4542 					   bool acquire)
4543 {
4544 	struct amdgpu_device *adev = ring->adev;
4545 	int pipe_num, tmp, reg;
4546 	int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
4547 
4548 	pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
4549 
4550 	/* first me only has 2 entries, GFX and HP3D */
4551 	if (ring->me > 0)
4552 		pipe_num -= 2;
4553 
4554 	reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
4555 	tmp = RREG32(reg);
4556 	tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
4557 	WREG32(reg, tmp);
4558 }
4559 
4560 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
4561 					    struct amdgpu_ring *ring,
4562 					    bool acquire)
4563 {
4564 	int i, pipe;
4565 	bool reserve;
4566 	struct amdgpu_ring *iring;
4567 
4568 	mutex_lock(&adev->gfx.pipe_reserve_mutex);
4569 	pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
4570 	if (acquire)
4571 		set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4572 	else
4573 		clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4574 
4575 	if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
4576 		/* Clear all reservations - everyone reacquires all resources */
4577 		for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
4578 			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
4579 						       true);
4580 
4581 		for (i = 0; i < adev->gfx.num_compute_rings; ++i)
4582 			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
4583 						       true);
4584 	} else {
4585 		/* Lower all pipes without a current reservation */
4586 		for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
4587 			iring = &adev->gfx.gfx_ring[i];
4588 			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
4589 							   iring->me,
4590 							   iring->pipe,
4591 							   0);
4592 			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4593 			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4594 		}
4595 
4596 		for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
4597 			iring = &adev->gfx.compute_ring[i];
4598 			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
4599 							   iring->me,
4600 							   iring->pipe,
4601 							   0);
4602 			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4603 			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4604 		}
4605 	}
4606 
4607 	mutex_unlock(&adev->gfx.pipe_reserve_mutex);
4608 }
4609 
4610 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
4611 				      struct amdgpu_ring *ring,
4612 				      bool acquire)
4613 {
4614 	uint32_t pipe_priority = acquire ? 0x2 : 0x0;
4615 	uint32_t queue_priority = acquire ? 0xf : 0x0;
4616 
4617 	mutex_lock(&adev->srbm_mutex);
4618 	soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4619 
4620 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
4621 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
4622 
4623 	soc15_grbm_select(adev, 0, 0, 0, 0);
4624 	mutex_unlock(&adev->srbm_mutex);
4625 }
4626 
4627 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
4628 					       enum drm_sched_priority priority)
4629 {
4630 	struct amdgpu_device *adev = ring->adev;
4631 	bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
4632 
4633 	if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
4634 		return;
4635 
4636 	gfx_v9_0_hqd_set_priority(adev, ring, acquire);
4637 	gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
4638 }
4639 
4640 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4641 {
4642 	struct amdgpu_device *adev = ring->adev;
4643 
4644 	/* XXX check if swapping is necessary on BE */
4645 	if (ring->use_doorbell) {
4646 		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4647 		WDOORBELL64(ring->doorbell_index, ring->wptr);
4648 	} else{
4649 		BUG(); /* only DOORBELL method supported on gfx9 now */
4650 	}
4651 }
4652 
4653 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
4654 					 u64 seq, unsigned int flags)
4655 {
4656 	struct amdgpu_device *adev = ring->adev;
4657 
4658 	/* we only allocate 32bit for each seq wb address */
4659 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
4660 
4661 	/* write fence seq to the "addr" */
4662 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4663 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4664 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
4665 	amdgpu_ring_write(ring, lower_32_bits(addr));
4666 	amdgpu_ring_write(ring, upper_32_bits(addr));
4667 	amdgpu_ring_write(ring, lower_32_bits(seq));
4668 
4669 	if (flags & AMDGPU_FENCE_FLAG_INT) {
4670 		/* set register to trigger INT */
4671 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4672 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4673 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
4674 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
4675 		amdgpu_ring_write(ring, 0);
4676 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
4677 	}
4678 }
4679 
4680 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
4681 {
4682 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4683 	amdgpu_ring_write(ring, 0);
4684 }
4685 
4686 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
4687 {
4688 	struct v9_ce_ib_state ce_payload = {0};
4689 	uint64_t csa_addr;
4690 	int cnt;
4691 
4692 	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
4693 	csa_addr = amdgpu_csa_vaddr(ring->adev);
4694 
4695 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
4696 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
4697 				 WRITE_DATA_DST_SEL(8) |
4698 				 WR_CONFIRM) |
4699 				 WRITE_DATA_CACHE_POLICY(0));
4700 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
4701 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
4702 	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
4703 }
4704 
4705 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
4706 {
4707 	struct v9_de_ib_state de_payload = {0};
4708 	uint64_t csa_addr, gds_addr;
4709 	int cnt;
4710 
4711 	csa_addr = amdgpu_csa_vaddr(ring->adev);
4712 	gds_addr = csa_addr + 4096;
4713 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
4714 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
4715 
4716 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
4717 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
4718 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
4719 				 WRITE_DATA_DST_SEL(8) |
4720 				 WR_CONFIRM) |
4721 				 WRITE_DATA_CACHE_POLICY(0));
4722 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
4723 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
4724 	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
4725 }
4726 
4727 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
4728 {
4729 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
4730 	amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
4731 }
4732 
4733 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
4734 {
4735 	uint32_t dw2 = 0;
4736 
4737 	if (amdgpu_sriov_vf(ring->adev))
4738 		gfx_v9_0_ring_emit_ce_meta(ring);
4739 
4740 	gfx_v9_0_ring_emit_tmz(ring, true);
4741 
4742 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
4743 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
4744 		/* set load_global_config & load_global_uconfig */
4745 		dw2 |= 0x8001;
4746 		/* set load_cs_sh_regs */
4747 		dw2 |= 0x01000000;
4748 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
4749 		dw2 |= 0x10002;
4750 
4751 		/* set load_ce_ram if preamble presented */
4752 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
4753 			dw2 |= 0x10000000;
4754 	} else {
4755 		/* still load_ce_ram if this is the first time preamble presented
4756 		 * although there is no context switch happens.
4757 		 */
4758 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
4759 			dw2 |= 0x10000000;
4760 	}
4761 
4762 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4763 	amdgpu_ring_write(ring, dw2);
4764 	amdgpu_ring_write(ring, 0);
4765 }
4766 
4767 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
4768 {
4769 	unsigned ret;
4770 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
4771 	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
4772 	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
4773 	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
4774 	ret = ring->wptr & ring->buf_mask;
4775 	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
4776 	return ret;
4777 }
4778 
4779 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
4780 {
4781 	unsigned cur;
4782 	BUG_ON(offset > ring->buf_mask);
4783 	BUG_ON(ring->ring[offset] != 0x55aa55aa);
4784 
4785 	cur = (ring->wptr & ring->buf_mask) - 1;
4786 	if (likely(cur > offset))
4787 		ring->ring[offset] = cur - offset;
4788 	else
4789 		ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
4790 }
4791 
4792 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
4793 {
4794 	struct amdgpu_device *adev = ring->adev;
4795 
4796 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4797 	amdgpu_ring_write(ring, 0 |	/* src: register*/
4798 				(5 << 8) |	/* dst: memory */
4799 				(1 << 20));	/* write confirm */
4800 	amdgpu_ring_write(ring, reg);
4801 	amdgpu_ring_write(ring, 0);
4802 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4803 				adev->virt.reg_val_offs * 4));
4804 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4805 				adev->virt.reg_val_offs * 4));
4806 }
4807 
4808 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
4809 				    uint32_t val)
4810 {
4811 	uint32_t cmd = 0;
4812 
4813 	switch (ring->funcs->type) {
4814 	case AMDGPU_RING_TYPE_GFX:
4815 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
4816 		break;
4817 	case AMDGPU_RING_TYPE_KIQ:
4818 		cmd = (1 << 16); /* no inc addr */
4819 		break;
4820 	default:
4821 		cmd = WR_CONFIRM;
4822 		break;
4823 	}
4824 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4825 	amdgpu_ring_write(ring, cmd);
4826 	amdgpu_ring_write(ring, reg);
4827 	amdgpu_ring_write(ring, 0);
4828 	amdgpu_ring_write(ring, val);
4829 }
4830 
4831 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
4832 					uint32_t val, uint32_t mask)
4833 {
4834 	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
4835 }
4836 
4837 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
4838 						  uint32_t reg0, uint32_t reg1,
4839 						  uint32_t ref, uint32_t mask)
4840 {
4841 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4842 	struct amdgpu_device *adev = ring->adev;
4843 	bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
4844 		adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
4845 
4846 	if (fw_version_ok)
4847 		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
4848 				      ref, mask, 0x20);
4849 	else
4850 		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
4851 							   ref, mask);
4852 }
4853 
4854 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
4855 {
4856 	struct amdgpu_device *adev = ring->adev;
4857 	uint32_t value = 0;
4858 
4859 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
4860 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
4861 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
4862 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
4863 	WREG32(mmSQ_CMD, value);
4864 }
4865 
4866 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4867 						 enum amdgpu_interrupt_state state)
4868 {
4869 	switch (state) {
4870 	case AMDGPU_IRQ_STATE_DISABLE:
4871 	case AMDGPU_IRQ_STATE_ENABLE:
4872 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4873 			       TIME_STAMP_INT_ENABLE,
4874 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4875 		break;
4876 	default:
4877 		break;
4878 	}
4879 }
4880 
4881 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4882 						     int me, int pipe,
4883 						     enum amdgpu_interrupt_state state)
4884 {
4885 	u32 mec_int_cntl, mec_int_cntl_reg;
4886 
4887 	/*
4888 	 * amdgpu controls only the first MEC. That's why this function only
4889 	 * handles the setting of interrupts for this specific MEC. All other
4890 	 * pipes' interrupts are set by amdkfd.
4891 	 */
4892 
4893 	if (me == 1) {
4894 		switch (pipe) {
4895 		case 0:
4896 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
4897 			break;
4898 		case 1:
4899 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
4900 			break;
4901 		case 2:
4902 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
4903 			break;
4904 		case 3:
4905 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
4906 			break;
4907 		default:
4908 			DRM_DEBUG("invalid pipe %d\n", pipe);
4909 			return;
4910 		}
4911 	} else {
4912 		DRM_DEBUG("invalid me %d\n", me);
4913 		return;
4914 	}
4915 
4916 	switch (state) {
4917 	case AMDGPU_IRQ_STATE_DISABLE:
4918 		mec_int_cntl = RREG32(mec_int_cntl_reg);
4919 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4920 					     TIME_STAMP_INT_ENABLE, 0);
4921 		WREG32(mec_int_cntl_reg, mec_int_cntl);
4922 		break;
4923 	case AMDGPU_IRQ_STATE_ENABLE:
4924 		mec_int_cntl = RREG32(mec_int_cntl_reg);
4925 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4926 					     TIME_STAMP_INT_ENABLE, 1);
4927 		WREG32(mec_int_cntl_reg, mec_int_cntl);
4928 		break;
4929 	default:
4930 		break;
4931 	}
4932 }
4933 
4934 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4935 					     struct amdgpu_irq_src *source,
4936 					     unsigned type,
4937 					     enum amdgpu_interrupt_state state)
4938 {
4939 	switch (state) {
4940 	case AMDGPU_IRQ_STATE_DISABLE:
4941 	case AMDGPU_IRQ_STATE_ENABLE:
4942 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4943 			       PRIV_REG_INT_ENABLE,
4944 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4945 		break;
4946 	default:
4947 		break;
4948 	}
4949 
4950 	return 0;
4951 }
4952 
4953 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4954 					      struct amdgpu_irq_src *source,
4955 					      unsigned type,
4956 					      enum amdgpu_interrupt_state state)
4957 {
4958 	switch (state) {
4959 	case AMDGPU_IRQ_STATE_DISABLE:
4960 	case AMDGPU_IRQ_STATE_ENABLE:
4961 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4962 			       PRIV_INSTR_INT_ENABLE,
4963 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4964 	default:
4965 		break;
4966 	}
4967 
4968 	return 0;
4969 }
4970 
4971 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)				\
4972 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
4973 			CP_ECC_ERROR_INT_ENABLE, 1)
4974 
4975 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)			\
4976 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
4977 			CP_ECC_ERROR_INT_ENABLE, 0)
4978 
4979 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
4980 					      struct amdgpu_irq_src *source,
4981 					      unsigned type,
4982 					      enum amdgpu_interrupt_state state)
4983 {
4984 	switch (state) {
4985 	case AMDGPU_IRQ_STATE_DISABLE:
4986 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4987 				CP_ECC_ERROR_INT_ENABLE, 0);
4988 		DISABLE_ECC_ON_ME_PIPE(1, 0);
4989 		DISABLE_ECC_ON_ME_PIPE(1, 1);
4990 		DISABLE_ECC_ON_ME_PIPE(1, 2);
4991 		DISABLE_ECC_ON_ME_PIPE(1, 3);
4992 		break;
4993 
4994 	case AMDGPU_IRQ_STATE_ENABLE:
4995 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4996 				CP_ECC_ERROR_INT_ENABLE, 1);
4997 		ENABLE_ECC_ON_ME_PIPE(1, 0);
4998 		ENABLE_ECC_ON_ME_PIPE(1, 1);
4999 		ENABLE_ECC_ON_ME_PIPE(1, 2);
5000 		ENABLE_ECC_ON_ME_PIPE(1, 3);
5001 		break;
5002 	default:
5003 		break;
5004 	}
5005 
5006 	return 0;
5007 }
5008 
5009 
5010 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5011 					    struct amdgpu_irq_src *src,
5012 					    unsigned type,
5013 					    enum amdgpu_interrupt_state state)
5014 {
5015 	switch (type) {
5016 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5017 		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5018 		break;
5019 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5020 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5021 		break;
5022 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5023 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5024 		break;
5025 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5026 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5027 		break;
5028 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5029 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5030 		break;
5031 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5032 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5033 		break;
5034 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5035 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5036 		break;
5037 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5038 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5039 		break;
5040 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5041 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5042 		break;
5043 	default:
5044 		break;
5045 	}
5046 	return 0;
5047 }
5048 
5049 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5050 			    struct amdgpu_irq_src *source,
5051 			    struct amdgpu_iv_entry *entry)
5052 {
5053 	int i;
5054 	u8 me_id, pipe_id, queue_id;
5055 	struct amdgpu_ring *ring;
5056 
5057 	DRM_DEBUG("IH: CP EOP\n");
5058 	me_id = (entry->ring_id & 0x0c) >> 2;
5059 	pipe_id = (entry->ring_id & 0x03) >> 0;
5060 	queue_id = (entry->ring_id & 0x70) >> 4;
5061 
5062 	switch (me_id) {
5063 	case 0:
5064 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5065 		break;
5066 	case 1:
5067 	case 2:
5068 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5069 			ring = &adev->gfx.compute_ring[i];
5070 			/* Per-queue interrupt is supported for MEC starting from VI.
5071 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
5072 			  */
5073 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5074 				amdgpu_fence_process(ring);
5075 		}
5076 		break;
5077 	}
5078 	return 0;
5079 }
5080 
5081 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5082 			   struct amdgpu_iv_entry *entry)
5083 {
5084 	u8 me_id, pipe_id, queue_id;
5085 	struct amdgpu_ring *ring;
5086 	int i;
5087 
5088 	me_id = (entry->ring_id & 0x0c) >> 2;
5089 	pipe_id = (entry->ring_id & 0x03) >> 0;
5090 	queue_id = (entry->ring_id & 0x70) >> 4;
5091 
5092 	switch (me_id) {
5093 	case 0:
5094 		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5095 		break;
5096 	case 1:
5097 	case 2:
5098 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5099 			ring = &adev->gfx.compute_ring[i];
5100 			if (ring->me == me_id && ring->pipe == pipe_id &&
5101 			    ring->queue == queue_id)
5102 				drm_sched_fault(&ring->sched);
5103 		}
5104 		break;
5105 	}
5106 }
5107 
5108 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5109 				 struct amdgpu_irq_src *source,
5110 				 struct amdgpu_iv_entry *entry)
5111 {
5112 	DRM_ERROR("Illegal register access in command stream\n");
5113 	gfx_v9_0_fault(adev, entry);
5114 	return 0;
5115 }
5116 
5117 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5118 				  struct amdgpu_irq_src *source,
5119 				  struct amdgpu_iv_entry *entry)
5120 {
5121 	DRM_ERROR("Illegal instruction in command stream\n");
5122 	gfx_v9_0_fault(adev, entry);
5123 	return 0;
5124 }
5125 
5126 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
5127 		struct amdgpu_iv_entry *entry)
5128 {
5129 	/* TODO ue will trigger an interrupt. */
5130 	kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
5131 	amdgpu_ras_reset_gpu(adev, 0);
5132 	return AMDGPU_RAS_UE;
5133 }
5134 
5135 static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev,
5136 				  struct amdgpu_irq_src *source,
5137 				  struct amdgpu_iv_entry *entry)
5138 {
5139 	struct ras_common_if *ras_if = adev->gfx.ras_if;
5140 	struct ras_dispatch_if ih_data = {
5141 		.entry = entry,
5142 	};
5143 
5144 	if (!ras_if)
5145 		return 0;
5146 
5147 	ih_data.head = *ras_if;
5148 
5149 	DRM_ERROR("CP ECC ERROR IRQ\n");
5150 	amdgpu_ras_interrupt_dispatch(adev, &ih_data);
5151 	return 0;
5152 }
5153 
5154 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
5155 	.name = "gfx_v9_0",
5156 	.early_init = gfx_v9_0_early_init,
5157 	.late_init = gfx_v9_0_late_init,
5158 	.sw_init = gfx_v9_0_sw_init,
5159 	.sw_fini = gfx_v9_0_sw_fini,
5160 	.hw_init = gfx_v9_0_hw_init,
5161 	.hw_fini = gfx_v9_0_hw_fini,
5162 	.suspend = gfx_v9_0_suspend,
5163 	.resume = gfx_v9_0_resume,
5164 	.is_idle = gfx_v9_0_is_idle,
5165 	.wait_for_idle = gfx_v9_0_wait_for_idle,
5166 	.soft_reset = gfx_v9_0_soft_reset,
5167 	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
5168 	.set_powergating_state = gfx_v9_0_set_powergating_state,
5169 	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
5170 };
5171 
5172 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
5173 	.type = AMDGPU_RING_TYPE_GFX,
5174 	.align_mask = 0xff,
5175 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
5176 	.support_64bit_ptrs = true,
5177 	.vmhub = AMDGPU_GFXHUB,
5178 	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
5179 	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
5180 	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
5181 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
5182 		5 +  /* COND_EXEC */
5183 		7 +  /* PIPELINE_SYNC */
5184 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5185 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5186 		2 + /* VM_FLUSH */
5187 		8 +  /* FENCE for VM_FLUSH */
5188 		20 + /* GDS switch */
5189 		4 + /* double SWITCH_BUFFER,
5190 		       the first COND_EXEC jump to the place just
5191 			   prior to this double SWITCH_BUFFER  */
5192 		5 + /* COND_EXEC */
5193 		7 +	 /*	HDP_flush */
5194 		4 +	 /*	VGT_flush */
5195 		14 + /*	CE_META */
5196 		31 + /*	DE_META */
5197 		3 + /* CNTX_CTRL */
5198 		5 + /* HDP_INVL */
5199 		8 + 8 + /* FENCE x2 */
5200 		2, /* SWITCH_BUFFER */
5201 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
5202 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
5203 	.emit_fence = gfx_v9_0_ring_emit_fence,
5204 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
5205 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
5206 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
5207 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
5208 	.test_ring = gfx_v9_0_ring_test_ring,
5209 	.test_ib = gfx_v9_0_ring_test_ib,
5210 	.insert_nop = amdgpu_ring_insert_nop,
5211 	.pad_ib = amdgpu_ring_generic_pad_ib,
5212 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
5213 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
5214 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
5215 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
5216 	.emit_tmz = gfx_v9_0_ring_emit_tmz,
5217 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
5218 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5219 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5220 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
5221 };
5222 
5223 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
5224 	.type = AMDGPU_RING_TYPE_COMPUTE,
5225 	.align_mask = 0xff,
5226 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
5227 	.support_64bit_ptrs = true,
5228 	.vmhub = AMDGPU_GFXHUB,
5229 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
5230 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
5231 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
5232 	.emit_frame_size =
5233 		20 + /* gfx_v9_0_ring_emit_gds_switch */
5234 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
5235 		5 + /* hdp invalidate */
5236 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
5237 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5238 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5239 		2 + /* gfx_v9_0_ring_emit_vm_flush */
5240 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
5241 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
5242 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
5243 	.emit_fence = gfx_v9_0_ring_emit_fence,
5244 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
5245 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
5246 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
5247 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
5248 	.test_ring = gfx_v9_0_ring_test_ring,
5249 	.test_ib = gfx_v9_0_ring_test_ib,
5250 	.insert_nop = amdgpu_ring_insert_nop,
5251 	.pad_ib = amdgpu_ring_generic_pad_ib,
5252 	.set_priority = gfx_v9_0_ring_set_priority_compute,
5253 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
5254 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5255 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5256 };
5257 
5258 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
5259 	.type = AMDGPU_RING_TYPE_KIQ,
5260 	.align_mask = 0xff,
5261 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
5262 	.support_64bit_ptrs = true,
5263 	.vmhub = AMDGPU_GFXHUB,
5264 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
5265 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
5266 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
5267 	.emit_frame_size =
5268 		20 + /* gfx_v9_0_ring_emit_gds_switch */
5269 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
5270 		5 + /* hdp invalidate */
5271 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
5272 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5273 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5274 		2 + /* gfx_v9_0_ring_emit_vm_flush */
5275 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
5276 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
5277 	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
5278 	.test_ring = gfx_v9_0_ring_test_ring,
5279 	.insert_nop = amdgpu_ring_insert_nop,
5280 	.pad_ib = amdgpu_ring_generic_pad_ib,
5281 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
5282 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
5283 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5284 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5285 };
5286 
5287 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
5288 {
5289 	int i;
5290 
5291 	adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
5292 
5293 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5294 		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
5295 
5296 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
5297 		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
5298 }
5299 
5300 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
5301 	.set = gfx_v9_0_set_eop_interrupt_state,
5302 	.process = gfx_v9_0_eop_irq,
5303 };
5304 
5305 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
5306 	.set = gfx_v9_0_set_priv_reg_fault_state,
5307 	.process = gfx_v9_0_priv_reg_irq,
5308 };
5309 
5310 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
5311 	.set = gfx_v9_0_set_priv_inst_fault_state,
5312 	.process = gfx_v9_0_priv_inst_irq,
5313 };
5314 
5315 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
5316 	.set = gfx_v9_0_set_cp_ecc_error_state,
5317 	.process = gfx_v9_0_cp_ecc_error_irq,
5318 };
5319 
5320 
5321 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
5322 {
5323 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5324 	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
5325 
5326 	adev->gfx.priv_reg_irq.num_types = 1;
5327 	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
5328 
5329 	adev->gfx.priv_inst_irq.num_types = 1;
5330 	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
5331 
5332 	adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
5333 	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
5334 }
5335 
5336 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
5337 {
5338 	switch (adev->asic_type) {
5339 	case CHIP_VEGA10:
5340 	case CHIP_VEGA12:
5341 	case CHIP_VEGA20:
5342 	case CHIP_RAVEN:
5343 		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
5344 		break;
5345 	default:
5346 		break;
5347 	}
5348 }
5349 
5350 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
5351 {
5352 	/* init asci gds info */
5353 	switch (adev->asic_type) {
5354 	case CHIP_VEGA10:
5355 	case CHIP_VEGA12:
5356 	case CHIP_VEGA20:
5357 		adev->gds.gds_size = 0x10000;
5358 		break;
5359 	case CHIP_RAVEN:
5360 		adev->gds.gds_size = 0x1000;
5361 		break;
5362 	default:
5363 		adev->gds.gds_size = 0x10000;
5364 		break;
5365 	}
5366 
5367 	switch (adev->asic_type) {
5368 	case CHIP_VEGA10:
5369 	case CHIP_VEGA20:
5370 		adev->gds.gds_compute_max_wave_id = 0x7ff;
5371 		break;
5372 	case CHIP_VEGA12:
5373 		adev->gds.gds_compute_max_wave_id = 0x27f;
5374 		break;
5375 	case CHIP_RAVEN:
5376 		if (adev->rev_id >= 0x8)
5377 			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
5378 		else
5379 			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
5380 		break;
5381 	default:
5382 		/* this really depends on the chip */
5383 		adev->gds.gds_compute_max_wave_id = 0x7ff;
5384 		break;
5385 	}
5386 
5387 	adev->gds.gws_size = 64;
5388 	adev->gds.oa_size = 16;
5389 }
5390 
5391 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
5392 						 u32 bitmap)
5393 {
5394 	u32 data;
5395 
5396 	if (!bitmap)
5397 		return;
5398 
5399 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5400 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5401 
5402 	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
5403 }
5404 
5405 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
5406 {
5407 	u32 data, mask;
5408 
5409 	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
5410 	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
5411 
5412 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5413 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5414 
5415 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
5416 
5417 	return (~data) & mask;
5418 }
5419 
5420 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
5421 				 struct amdgpu_cu_info *cu_info)
5422 {
5423 	int i, j, k, counter, active_cu_number = 0;
5424 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5425 	unsigned disable_masks[4 * 2];
5426 
5427 	if (!adev || !cu_info)
5428 		return -EINVAL;
5429 
5430 	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
5431 
5432 	mutex_lock(&adev->grbm_idx_mutex);
5433 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5434 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5435 			mask = 1;
5436 			ao_bitmap = 0;
5437 			counter = 0;
5438 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
5439 			if (i < 4 && j < 2)
5440 				gfx_v9_0_set_user_cu_inactive_bitmap(
5441 					adev, disable_masks[i * 2 + j]);
5442 			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
5443 			cu_info->bitmap[i][j] = bitmap;
5444 
5445 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
5446 				if (bitmap & mask) {
5447 					if (counter < adev->gfx.config.max_cu_per_sh)
5448 						ao_bitmap |= mask;
5449 					counter ++;
5450 				}
5451 				mask <<= 1;
5452 			}
5453 			active_cu_number += counter;
5454 			if (i < 2 && j < 2)
5455 				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5456 			cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
5457 		}
5458 	}
5459 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5460 	mutex_unlock(&adev->grbm_idx_mutex);
5461 
5462 	cu_info->number = active_cu_number;
5463 	cu_info->ao_cu_mask = ao_cu_mask;
5464 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
5465 
5466 	return 0;
5467 }
5468 
5469 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
5470 {
5471 	.type = AMD_IP_BLOCK_TYPE_GFX,
5472 	.major = 9,
5473 	.minor = 0,
5474 	.rev = 0,
5475 	.funcs = &gfx_v9_0_ip_funcs,
5476 };
5477