xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c (revision e6f4c346)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36 
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 #include "vega10_enum.h"
40 #include "hdp/hdp_4_0_offset.h"
41 
42 #include "soc15.h"
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
46 
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48 
49 #include "amdgpu_ras.h"
50 
51 #define GFX9_NUM_GFX_RINGS     1
52 #define GFX9_MEC_HPD_SIZE 4096
53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
55 
56 #define mmPWR_MISC_CNTL_STATUS					0x0183
57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX				0
58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT	0x0
59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT		0x1
60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK		0x00000001L
61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK		0x00000006L
62 
63 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
64 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
65 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
66 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
69 
70 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
71 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
72 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
73 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
76 
77 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
78 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
79 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
80 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
83 
84 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
85 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
86 MODULE_FIRMWARE("amdgpu/raven_me.bin");
87 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
88 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
89 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
90 
91 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
92 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
93 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
94 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
98 
99 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
100 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
101 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
102 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
105 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
106 
107 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
108 {
109 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
110 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
111 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
112 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
113 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
114 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
115 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
116 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
117 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
118 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
119 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
120 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
121 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
122 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
123 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
124 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
125 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
126 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
127 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
128 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
129 };
130 
131 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
132 {
133 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
134 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
135 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
136 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
137 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
138 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
139 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
140 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
141 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
142 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
143 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
144 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
145 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
146 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
147 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
148 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
149 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
150 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
151 };
152 
153 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
154 {
155 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
156 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
157 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
158 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
159 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
160 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
161 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
162 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
163 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
164 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
165 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
166 };
167 
168 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
169 {
170 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
171 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
172 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
173 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
174 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
175 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
176 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
177 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
178 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
179 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
180 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
181 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
182 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
183 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
184 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
185 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
186 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
187 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
188 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
189 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
190 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
191 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
192 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
193 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
194 };
195 
196 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
197 {
198 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
199 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
200 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
201 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
202 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
203 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
204 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
205 };
206 
207 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
208 {
209 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
210 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
211 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
212 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
213 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
214 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
215 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
216 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
217 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
218 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
219 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
220 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
221 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
222 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
223 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
224 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
225 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
226 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
227 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
228 };
229 
230 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
231 {
232 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
233 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
234 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
235 };
236 
237 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
238 {
239 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
240 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
241 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
242 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
243 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
244 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
245 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
246 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
247 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
248 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
249 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
250 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
251 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
252 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
253 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
254 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
255 };
256 
257 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
258 {
259 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
260 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
261 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
262 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
263 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
264 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
265 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
266 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
267 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
268 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
269 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
270 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
271 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
272 };
273 
274 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
275 {
276 	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
277 	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
278 	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
279 	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
280 	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
281 	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
282 	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
283 	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
284 };
285 
286 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
287 {
288 	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
289 	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
290 	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
291 	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
292 	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
293 	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
294 	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
295 	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
296 };
297 
298 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
299 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
300 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
301 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
302 
303 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
304 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
305 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
306 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
307 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
308                                  struct amdgpu_cu_info *cu_info);
309 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
310 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
311 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
312 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
313 
314 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
315 {
316 	switch (adev->asic_type) {
317 	case CHIP_VEGA10:
318 		if (!amdgpu_virt_support_skip_setting(adev)) {
319 			soc15_program_register_sequence(adev,
320 							 golden_settings_gc_9_0,
321 							 ARRAY_SIZE(golden_settings_gc_9_0));
322 			soc15_program_register_sequence(adev,
323 							 golden_settings_gc_9_0_vg10,
324 							 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
325 		}
326 		break;
327 	case CHIP_VEGA12:
328 		soc15_program_register_sequence(adev,
329 						golden_settings_gc_9_2_1,
330 						ARRAY_SIZE(golden_settings_gc_9_2_1));
331 		soc15_program_register_sequence(adev,
332 						golden_settings_gc_9_2_1_vg12,
333 						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
334 		break;
335 	case CHIP_VEGA20:
336 		soc15_program_register_sequence(adev,
337 						golden_settings_gc_9_0,
338 						ARRAY_SIZE(golden_settings_gc_9_0));
339 		soc15_program_register_sequence(adev,
340 						golden_settings_gc_9_0_vg20,
341 						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
342 		break;
343 	case CHIP_RAVEN:
344 		soc15_program_register_sequence(adev, golden_settings_gc_9_1,
345 						ARRAY_SIZE(golden_settings_gc_9_1));
346 		if (adev->rev_id >= 8)
347 			soc15_program_register_sequence(adev,
348 							golden_settings_gc_9_1_rv2,
349 							ARRAY_SIZE(golden_settings_gc_9_1_rv2));
350 		else
351 			soc15_program_register_sequence(adev,
352 							golden_settings_gc_9_1_rv1,
353 							ARRAY_SIZE(golden_settings_gc_9_1_rv1));
354 		break;
355 	default:
356 		break;
357 	}
358 
359 	soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
360 					(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
361 }
362 
363 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
364 {
365 	adev->gfx.scratch.num_reg = 8;
366 	adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
367 	adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
368 }
369 
370 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
371 				       bool wc, uint32_t reg, uint32_t val)
372 {
373 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
374 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
375 				WRITE_DATA_DST_SEL(0) |
376 				(wc ? WR_CONFIRM : 0));
377 	amdgpu_ring_write(ring, reg);
378 	amdgpu_ring_write(ring, 0);
379 	amdgpu_ring_write(ring, val);
380 }
381 
382 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
383 				  int mem_space, int opt, uint32_t addr0,
384 				  uint32_t addr1, uint32_t ref, uint32_t mask,
385 				  uint32_t inv)
386 {
387 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
388 	amdgpu_ring_write(ring,
389 				 /* memory (1) or register (0) */
390 				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
391 				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
392 				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
393 				 WAIT_REG_MEM_ENGINE(eng_sel)));
394 
395 	if (mem_space)
396 		BUG_ON(addr0 & 0x3); /* Dword align */
397 	amdgpu_ring_write(ring, addr0);
398 	amdgpu_ring_write(ring, addr1);
399 	amdgpu_ring_write(ring, ref);
400 	amdgpu_ring_write(ring, mask);
401 	amdgpu_ring_write(ring, inv); /* poll interval */
402 }
403 
404 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
405 {
406 	struct amdgpu_device *adev = ring->adev;
407 	uint32_t scratch;
408 	uint32_t tmp = 0;
409 	unsigned i;
410 	int r;
411 
412 	r = amdgpu_gfx_scratch_get(adev, &scratch);
413 	if (r)
414 		return r;
415 
416 	WREG32(scratch, 0xCAFEDEAD);
417 	r = amdgpu_ring_alloc(ring, 3);
418 	if (r)
419 		goto error_free_scratch;
420 
421 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
422 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
423 	amdgpu_ring_write(ring, 0xDEADBEEF);
424 	amdgpu_ring_commit(ring);
425 
426 	for (i = 0; i < adev->usec_timeout; i++) {
427 		tmp = RREG32(scratch);
428 		if (tmp == 0xDEADBEEF)
429 			break;
430 		udelay(1);
431 	}
432 
433 	if (i >= adev->usec_timeout)
434 		r = -ETIMEDOUT;
435 
436 error_free_scratch:
437 	amdgpu_gfx_scratch_free(adev, scratch);
438 	return r;
439 }
440 
441 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
442 {
443 	struct amdgpu_device *adev = ring->adev;
444 	struct amdgpu_ib ib;
445 	struct dma_fence *f = NULL;
446 
447 	unsigned index;
448 	uint64_t gpu_addr;
449 	uint32_t tmp;
450 	long r;
451 
452 	r = amdgpu_device_wb_get(adev, &index);
453 	if (r)
454 		return r;
455 
456 	gpu_addr = adev->wb.gpu_addr + (index * 4);
457 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
458 	memset(&ib, 0, sizeof(ib));
459 	r = amdgpu_ib_get(adev, NULL, 16, &ib);
460 	if (r)
461 		goto err1;
462 
463 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
464 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
465 	ib.ptr[2] = lower_32_bits(gpu_addr);
466 	ib.ptr[3] = upper_32_bits(gpu_addr);
467 	ib.ptr[4] = 0xDEADBEEF;
468 	ib.length_dw = 5;
469 
470 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
471 	if (r)
472 		goto err2;
473 
474 	r = dma_fence_wait_timeout(f, false, timeout);
475 	if (r == 0) {
476 		r = -ETIMEDOUT;
477 		goto err2;
478 	} else if (r < 0) {
479 		goto err2;
480 	}
481 
482 	tmp = adev->wb.wb[index];
483 	if (tmp == 0xDEADBEEF)
484 		r = 0;
485 	else
486 		r = -EINVAL;
487 
488 err2:
489 	amdgpu_ib_free(adev, &ib, NULL);
490 	dma_fence_put(f);
491 err1:
492 	amdgpu_device_wb_free(adev, index);
493 	return r;
494 }
495 
496 
497 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
498 {
499 	release_firmware(adev->gfx.pfp_fw);
500 	adev->gfx.pfp_fw = NULL;
501 	release_firmware(adev->gfx.me_fw);
502 	adev->gfx.me_fw = NULL;
503 	release_firmware(adev->gfx.ce_fw);
504 	adev->gfx.ce_fw = NULL;
505 	release_firmware(adev->gfx.rlc_fw);
506 	adev->gfx.rlc_fw = NULL;
507 	release_firmware(adev->gfx.mec_fw);
508 	adev->gfx.mec_fw = NULL;
509 	release_firmware(adev->gfx.mec2_fw);
510 	adev->gfx.mec2_fw = NULL;
511 
512 	kfree(adev->gfx.rlc.register_list_format);
513 }
514 
515 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
516 {
517 	const struct rlc_firmware_header_v2_1 *rlc_hdr;
518 
519 	rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
520 	adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
521 	adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
522 	adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
523 	adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
524 	adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
525 	adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
526 	adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
527 	adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
528 	adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
529 	adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
530 	adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
531 	adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
532 	adev->gfx.rlc.reg_list_format_direct_reg_list_length =
533 			le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
534 }
535 
536 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
537 {
538 	adev->gfx.me_fw_write_wait = false;
539 	adev->gfx.mec_fw_write_wait = false;
540 
541 	switch (adev->asic_type) {
542 	case CHIP_VEGA10:
543 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
544 		    (adev->gfx.me_feature_version >= 42) &&
545 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
546 		    (adev->gfx.pfp_feature_version >= 42))
547 			adev->gfx.me_fw_write_wait = true;
548 
549 		if ((adev->gfx.mec_fw_version >=  0x00000193) &&
550 		    (adev->gfx.mec_feature_version >= 42))
551 			adev->gfx.mec_fw_write_wait = true;
552 		break;
553 	case CHIP_VEGA12:
554 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
555 		    (adev->gfx.me_feature_version >= 44) &&
556 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
557 		    (adev->gfx.pfp_feature_version >= 44))
558 			adev->gfx.me_fw_write_wait = true;
559 
560 		if ((adev->gfx.mec_fw_version >=  0x00000196) &&
561 		    (adev->gfx.mec_feature_version >= 44))
562 			adev->gfx.mec_fw_write_wait = true;
563 		break;
564 	case CHIP_VEGA20:
565 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
566 		    (adev->gfx.me_feature_version >= 44) &&
567 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
568 		    (adev->gfx.pfp_feature_version >= 44))
569 			adev->gfx.me_fw_write_wait = true;
570 
571 		if ((adev->gfx.mec_fw_version >=  0x00000197) &&
572 		    (adev->gfx.mec_feature_version >= 44))
573 			adev->gfx.mec_fw_write_wait = true;
574 		break;
575 	case CHIP_RAVEN:
576 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
577 		    (adev->gfx.me_feature_version >= 42) &&
578 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
579 		    (adev->gfx.pfp_feature_version >= 42))
580 			adev->gfx.me_fw_write_wait = true;
581 
582 		if ((adev->gfx.mec_fw_version >=  0x00000192) &&
583 		    (adev->gfx.mec_feature_version >= 42))
584 			adev->gfx.mec_fw_write_wait = true;
585 		break;
586 	default:
587 		break;
588 	}
589 }
590 
591 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
592 {
593 	switch (adev->asic_type) {
594 	case CHIP_VEGA10:
595 	case CHIP_VEGA12:
596 	case CHIP_VEGA20:
597 		break;
598 	case CHIP_RAVEN:
599 		if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
600 			break;
601 		if ((adev->gfx.rlc_fw_version != 106 &&
602 		     adev->gfx.rlc_fw_version < 531) ||
603 		    (adev->gfx.rlc_fw_version == 53815) ||
604 		    (adev->gfx.rlc_feature_version < 1) ||
605 		    !adev->gfx.rlc.is_rlc_v2_1)
606 			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
607 		break;
608 	default:
609 		break;
610 	}
611 }
612 
613 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
614 {
615 	const char *chip_name;
616 	char fw_name[30];
617 	int err;
618 	struct amdgpu_firmware_info *info = NULL;
619 	const struct common_firmware_header *header = NULL;
620 	const struct gfx_firmware_header_v1_0 *cp_hdr;
621 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
622 	unsigned int *tmp = NULL;
623 	unsigned int i = 0;
624 	uint16_t version_major;
625 	uint16_t version_minor;
626 	uint32_t smu_version;
627 
628 	DRM_DEBUG("\n");
629 
630 	switch (adev->asic_type) {
631 	case CHIP_VEGA10:
632 		chip_name = "vega10";
633 		break;
634 	case CHIP_VEGA12:
635 		chip_name = "vega12";
636 		break;
637 	case CHIP_VEGA20:
638 		chip_name = "vega20";
639 		break;
640 	case CHIP_RAVEN:
641 		if (adev->rev_id >= 8)
642 			chip_name = "raven2";
643 		else if (adev->pdev->device == 0x15d8)
644 			chip_name = "picasso";
645 		else
646 			chip_name = "raven";
647 		break;
648 	default:
649 		BUG();
650 	}
651 
652 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
653 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
654 	if (err)
655 		goto out;
656 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
657 	if (err)
658 		goto out;
659 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
660 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
661 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
662 
663 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
664 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
665 	if (err)
666 		goto out;
667 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
668 	if (err)
669 		goto out;
670 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
671 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
672 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
673 
674 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
675 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
676 	if (err)
677 		goto out;
678 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
679 	if (err)
680 		goto out;
681 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
682 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
683 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
684 
685 	/*
686 	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
687 	 * instead of picasso_rlc.bin.
688 	 * Judgment method:
689 	 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
690 	 *          or revision >= 0xD8 && revision <= 0xDF
691 	 * otherwise is PCO FP5
692 	 */
693 	if (!strcmp(chip_name, "picasso") &&
694 		(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
695 		((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
696 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
697 	else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
698 		(smu_version >= 0x41e2b))
699 		/**
700 		*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
701 		*/
702 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
703 	else
704 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
705 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
706 	if (err)
707 		goto out;
708 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
709 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
710 
711 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
712 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
713 	if (version_major == 2 && version_minor == 1)
714 		adev->gfx.rlc.is_rlc_v2_1 = true;
715 
716 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
717 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
718 	adev->gfx.rlc.save_and_restore_offset =
719 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
720 	adev->gfx.rlc.clear_state_descriptor_offset =
721 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
722 	adev->gfx.rlc.avail_scratch_ram_locations =
723 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
724 	adev->gfx.rlc.reg_restore_list_size =
725 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
726 	adev->gfx.rlc.reg_list_format_start =
727 			le32_to_cpu(rlc_hdr->reg_list_format_start);
728 	adev->gfx.rlc.reg_list_format_separate_start =
729 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
730 	adev->gfx.rlc.starting_offsets_start =
731 			le32_to_cpu(rlc_hdr->starting_offsets_start);
732 	adev->gfx.rlc.reg_list_format_size_bytes =
733 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
734 	adev->gfx.rlc.reg_list_size_bytes =
735 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
736 	adev->gfx.rlc.register_list_format =
737 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
738 				adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
739 	if (!adev->gfx.rlc.register_list_format) {
740 		err = -ENOMEM;
741 		goto out;
742 	}
743 
744 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
745 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
746 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
747 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
748 
749 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
750 
751 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
752 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
753 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
754 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
755 
756 	if (adev->gfx.rlc.is_rlc_v2_1)
757 		gfx_v9_0_init_rlc_ext_microcode(adev);
758 
759 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
760 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
761 	if (err)
762 		goto out;
763 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
764 	if (err)
765 		goto out;
766 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
767 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
768 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
769 
770 
771 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
772 	err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
773 	if (!err) {
774 		err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
775 		if (err)
776 			goto out;
777 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
778 		adev->gfx.mec2_fw->data;
779 		adev->gfx.mec2_fw_version =
780 		le32_to_cpu(cp_hdr->header.ucode_version);
781 		adev->gfx.mec2_feature_version =
782 		le32_to_cpu(cp_hdr->ucode_feature_version);
783 	} else {
784 		err = 0;
785 		adev->gfx.mec2_fw = NULL;
786 	}
787 
788 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
789 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
790 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
791 		info->fw = adev->gfx.pfp_fw;
792 		header = (const struct common_firmware_header *)info->fw->data;
793 		adev->firmware.fw_size +=
794 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
795 
796 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
797 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
798 		info->fw = adev->gfx.me_fw;
799 		header = (const struct common_firmware_header *)info->fw->data;
800 		adev->firmware.fw_size +=
801 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
802 
803 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
804 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
805 		info->fw = adev->gfx.ce_fw;
806 		header = (const struct common_firmware_header *)info->fw->data;
807 		adev->firmware.fw_size +=
808 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
809 
810 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
811 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
812 		info->fw = adev->gfx.rlc_fw;
813 		header = (const struct common_firmware_header *)info->fw->data;
814 		adev->firmware.fw_size +=
815 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
816 
817 		if (adev->gfx.rlc.is_rlc_v2_1 &&
818 		    adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
819 		    adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
820 		    adev->gfx.rlc.save_restore_list_srm_size_bytes) {
821 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
822 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
823 			info->fw = adev->gfx.rlc_fw;
824 			adev->firmware.fw_size +=
825 				ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
826 
827 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
828 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
829 			info->fw = adev->gfx.rlc_fw;
830 			adev->firmware.fw_size +=
831 				ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
832 
833 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
834 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
835 			info->fw = adev->gfx.rlc_fw;
836 			adev->firmware.fw_size +=
837 				ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
838 		}
839 
840 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
841 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
842 		info->fw = adev->gfx.mec_fw;
843 		header = (const struct common_firmware_header *)info->fw->data;
844 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
845 		adev->firmware.fw_size +=
846 			ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
847 
848 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
849 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
850 		info->fw = adev->gfx.mec_fw;
851 		adev->firmware.fw_size +=
852 			ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
853 
854 		if (adev->gfx.mec2_fw) {
855 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
856 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
857 			info->fw = adev->gfx.mec2_fw;
858 			header = (const struct common_firmware_header *)info->fw->data;
859 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
860 			adev->firmware.fw_size +=
861 				ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
862 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
863 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
864 			info->fw = adev->gfx.mec2_fw;
865 			adev->firmware.fw_size +=
866 				ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
867 		}
868 
869 	}
870 
871 out:
872 	gfx_v9_0_check_if_need_gfxoff(adev);
873 	gfx_v9_0_check_fw_write_wait(adev);
874 	if (err) {
875 		dev_err(adev->dev,
876 			"gfx9: Failed to load firmware \"%s\"\n",
877 			fw_name);
878 		release_firmware(adev->gfx.pfp_fw);
879 		adev->gfx.pfp_fw = NULL;
880 		release_firmware(adev->gfx.me_fw);
881 		adev->gfx.me_fw = NULL;
882 		release_firmware(adev->gfx.ce_fw);
883 		adev->gfx.ce_fw = NULL;
884 		release_firmware(adev->gfx.rlc_fw);
885 		adev->gfx.rlc_fw = NULL;
886 		release_firmware(adev->gfx.mec_fw);
887 		adev->gfx.mec_fw = NULL;
888 		release_firmware(adev->gfx.mec2_fw);
889 		adev->gfx.mec2_fw = NULL;
890 	}
891 	return err;
892 }
893 
894 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
895 {
896 	u32 count = 0;
897 	const struct cs_section_def *sect = NULL;
898 	const struct cs_extent_def *ext = NULL;
899 
900 	/* begin clear state */
901 	count += 2;
902 	/* context control state */
903 	count += 3;
904 
905 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
906 		for (ext = sect->section; ext->extent != NULL; ++ext) {
907 			if (sect->id == SECT_CONTEXT)
908 				count += 2 + ext->reg_count;
909 			else
910 				return 0;
911 		}
912 	}
913 
914 	/* end clear state */
915 	count += 2;
916 	/* clear state */
917 	count += 2;
918 
919 	return count;
920 }
921 
922 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
923 				    volatile u32 *buffer)
924 {
925 	u32 count = 0, i;
926 	const struct cs_section_def *sect = NULL;
927 	const struct cs_extent_def *ext = NULL;
928 
929 	if (adev->gfx.rlc.cs_data == NULL)
930 		return;
931 	if (buffer == NULL)
932 		return;
933 
934 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
935 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
936 
937 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
938 	buffer[count++] = cpu_to_le32(0x80000000);
939 	buffer[count++] = cpu_to_le32(0x80000000);
940 
941 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
942 		for (ext = sect->section; ext->extent != NULL; ++ext) {
943 			if (sect->id == SECT_CONTEXT) {
944 				buffer[count++] =
945 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
946 				buffer[count++] = cpu_to_le32(ext->reg_index -
947 						PACKET3_SET_CONTEXT_REG_START);
948 				for (i = 0; i < ext->reg_count; i++)
949 					buffer[count++] = cpu_to_le32(ext->extent[i]);
950 			} else {
951 				return;
952 			}
953 		}
954 	}
955 
956 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
957 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
958 
959 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
960 	buffer[count++] = cpu_to_le32(0);
961 }
962 
963 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
964 {
965 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
966 	uint32_t pg_always_on_cu_num = 2;
967 	uint32_t always_on_cu_num;
968 	uint32_t i, j, k;
969 	uint32_t mask, cu_bitmap, counter;
970 
971 	if (adev->flags & AMD_IS_APU)
972 		always_on_cu_num = 4;
973 	else if (adev->asic_type == CHIP_VEGA12)
974 		always_on_cu_num = 8;
975 	else
976 		always_on_cu_num = 12;
977 
978 	mutex_lock(&adev->grbm_idx_mutex);
979 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
980 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
981 			mask = 1;
982 			cu_bitmap = 0;
983 			counter = 0;
984 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
985 
986 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
987 				if (cu_info->bitmap[i][j] & mask) {
988 					if (counter == pg_always_on_cu_num)
989 						WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
990 					if (counter < always_on_cu_num)
991 						cu_bitmap |= mask;
992 					else
993 						break;
994 					counter++;
995 				}
996 				mask <<= 1;
997 			}
998 
999 			WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1000 			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1001 		}
1002 	}
1003 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1004 	mutex_unlock(&adev->grbm_idx_mutex);
1005 }
1006 
1007 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1008 {
1009 	uint32_t data;
1010 
1011 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1012 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1013 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1014 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1015 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1016 
1017 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1018 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1019 
1020 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1021 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1022 
1023 	mutex_lock(&adev->grbm_idx_mutex);
1024 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1025 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1026 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1027 
1028 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1029 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1030 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1031 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1032 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1033 
1034 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1035 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1036 	data &= 0x0000FFFF;
1037 	data |= 0x00C00000;
1038 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1039 
1040 	/*
1041 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1042 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1043 	 */
1044 
1045 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1046 	 * but used for RLC_LB_CNTL configuration */
1047 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1048 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1049 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1050 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1051 	mutex_unlock(&adev->grbm_idx_mutex);
1052 
1053 	gfx_v9_0_init_always_on_cu_mask(adev);
1054 }
1055 
1056 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1057 {
1058 	uint32_t data;
1059 
1060 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1061 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1062 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1063 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1064 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1065 
1066 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1067 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1068 
1069 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1070 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1071 
1072 	mutex_lock(&adev->grbm_idx_mutex);
1073 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1074 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1075 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1076 
1077 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1078 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1079 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1080 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1081 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1082 
1083 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1084 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1085 	data &= 0x0000FFFF;
1086 	data |= 0x00C00000;
1087 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1088 
1089 	/*
1090 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1091 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1092 	 */
1093 
1094 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1095 	 * but used for RLC_LB_CNTL configuration */
1096 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1097 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1098 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1099 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1100 	mutex_unlock(&adev->grbm_idx_mutex);
1101 
1102 	gfx_v9_0_init_always_on_cu_mask(adev);
1103 }
1104 
1105 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1106 {
1107 	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1108 }
1109 
1110 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1111 {
1112 	return 5;
1113 }
1114 
1115 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1116 {
1117 	const struct cs_section_def *cs_data;
1118 	int r;
1119 
1120 	adev->gfx.rlc.cs_data = gfx9_cs_data;
1121 
1122 	cs_data = adev->gfx.rlc.cs_data;
1123 
1124 	if (cs_data) {
1125 		/* init clear state block */
1126 		r = amdgpu_gfx_rlc_init_csb(adev);
1127 		if (r)
1128 			return r;
1129 	}
1130 
1131 	if (adev->asic_type == CHIP_RAVEN) {
1132 		/* TODO: double check the cp_table_size for RV */
1133 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1134 		r = amdgpu_gfx_rlc_init_cpt(adev);
1135 		if (r)
1136 			return r;
1137 	}
1138 
1139 	switch (adev->asic_type) {
1140 	case CHIP_RAVEN:
1141 		gfx_v9_0_init_lbpw(adev);
1142 		break;
1143 	case CHIP_VEGA20:
1144 		gfx_v9_4_init_lbpw(adev);
1145 		break;
1146 	default:
1147 		break;
1148 	}
1149 
1150 	return 0;
1151 }
1152 
1153 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
1154 {
1155 	int r;
1156 
1157 	r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1158 	if (unlikely(r != 0))
1159 		return r;
1160 
1161 	r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1162 			AMDGPU_GEM_DOMAIN_VRAM);
1163 	if (!r)
1164 		adev->gfx.rlc.clear_state_gpu_addr =
1165 			amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1166 
1167 	amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1168 
1169 	return r;
1170 }
1171 
1172 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
1173 {
1174 	int r;
1175 
1176 	if (!adev->gfx.rlc.clear_state_obj)
1177 		return;
1178 
1179 	r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1180 	if (likely(r == 0)) {
1181 		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1182 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1183 	}
1184 }
1185 
1186 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1187 {
1188 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1189 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1190 }
1191 
1192 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1193 {
1194 	int r;
1195 	u32 *hpd;
1196 	const __le32 *fw_data;
1197 	unsigned fw_size;
1198 	u32 *fw;
1199 	size_t mec_hpd_size;
1200 
1201 	const struct gfx_firmware_header_v1_0 *mec_hdr;
1202 
1203 	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1204 
1205 	/* take ownership of the relevant compute queues */
1206 	amdgpu_gfx_compute_queue_acquire(adev);
1207 	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1208 
1209 	r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1210 				      AMDGPU_GEM_DOMAIN_VRAM,
1211 				      &adev->gfx.mec.hpd_eop_obj,
1212 				      &adev->gfx.mec.hpd_eop_gpu_addr,
1213 				      (void **)&hpd);
1214 	if (r) {
1215 		dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1216 		gfx_v9_0_mec_fini(adev);
1217 		return r;
1218 	}
1219 
1220 	memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1221 
1222 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1223 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1224 
1225 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1226 
1227 	fw_data = (const __le32 *)
1228 		(adev->gfx.mec_fw->data +
1229 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1230 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1231 
1232 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1233 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1234 				      &adev->gfx.mec.mec_fw_obj,
1235 				      &adev->gfx.mec.mec_fw_gpu_addr,
1236 				      (void **)&fw);
1237 	if (r) {
1238 		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1239 		gfx_v9_0_mec_fini(adev);
1240 		return r;
1241 	}
1242 
1243 	memcpy(fw, fw_data, fw_size);
1244 
1245 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1246 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1247 
1248 	return 0;
1249 }
1250 
1251 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1252 {
1253 	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1254 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1255 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1256 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
1257 		(SQ_IND_INDEX__FORCE_READ_MASK));
1258 	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1259 }
1260 
1261 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1262 			   uint32_t wave, uint32_t thread,
1263 			   uint32_t regno, uint32_t num, uint32_t *out)
1264 {
1265 	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1266 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1267 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1268 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
1269 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1270 		(SQ_IND_INDEX__FORCE_READ_MASK) |
1271 		(SQ_IND_INDEX__AUTO_INCR_MASK));
1272 	while (num--)
1273 		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1274 }
1275 
1276 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1277 {
1278 	/* type 1 wave data */
1279 	dst[(*no_fields)++] = 1;
1280 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1281 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1282 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1283 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1284 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1285 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1286 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1287 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1288 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1289 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1290 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1291 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1292 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1293 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1294 }
1295 
1296 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1297 				     uint32_t wave, uint32_t start,
1298 				     uint32_t size, uint32_t *dst)
1299 {
1300 	wave_read_regs(
1301 		adev, simd, wave, 0,
1302 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1303 }
1304 
1305 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1306 				     uint32_t wave, uint32_t thread,
1307 				     uint32_t start, uint32_t size,
1308 				     uint32_t *dst)
1309 {
1310 	wave_read_regs(
1311 		adev, simd, wave, thread,
1312 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1313 }
1314 
1315 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1316 				  u32 me, u32 pipe, u32 q, u32 vm)
1317 {
1318 	soc15_grbm_select(adev, me, pipe, q, vm);
1319 }
1320 
1321 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1322 	.get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1323 	.select_se_sh = &gfx_v9_0_select_se_sh,
1324 	.read_wave_data = &gfx_v9_0_read_wave_data,
1325 	.read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1326 	.read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1327 	.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q
1328 };
1329 
1330 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1331 {
1332 	u32 gb_addr_config;
1333 	int err;
1334 
1335 	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1336 
1337 	switch (adev->asic_type) {
1338 	case CHIP_VEGA10:
1339 		adev->gfx.config.max_hw_contexts = 8;
1340 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1341 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1342 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1343 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1344 		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1345 		break;
1346 	case CHIP_VEGA12:
1347 		adev->gfx.config.max_hw_contexts = 8;
1348 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1349 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1350 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1351 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1352 		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1353 		DRM_INFO("fix gfx.config for vega12\n");
1354 		break;
1355 	case CHIP_VEGA20:
1356 		adev->gfx.config.max_hw_contexts = 8;
1357 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1358 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1359 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1360 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1361 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1362 		gb_addr_config &= ~0xf3e777ff;
1363 		gb_addr_config |= 0x22014042;
1364 		/* check vbios table if gpu info is not available */
1365 		err = amdgpu_atomfirmware_get_gfx_info(adev);
1366 		if (err)
1367 			return err;
1368 		break;
1369 	case CHIP_RAVEN:
1370 		adev->gfx.config.max_hw_contexts = 8;
1371 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1372 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1373 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1374 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1375 		if (adev->rev_id >= 8)
1376 			gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1377 		else
1378 			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1379 		break;
1380 	default:
1381 		BUG();
1382 		break;
1383 	}
1384 
1385 	adev->gfx.config.gb_addr_config = gb_addr_config;
1386 
1387 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1388 			REG_GET_FIELD(
1389 					adev->gfx.config.gb_addr_config,
1390 					GB_ADDR_CONFIG,
1391 					NUM_PIPES);
1392 
1393 	adev->gfx.config.max_tile_pipes =
1394 		adev->gfx.config.gb_addr_config_fields.num_pipes;
1395 
1396 	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1397 			REG_GET_FIELD(
1398 					adev->gfx.config.gb_addr_config,
1399 					GB_ADDR_CONFIG,
1400 					NUM_BANKS);
1401 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1402 			REG_GET_FIELD(
1403 					adev->gfx.config.gb_addr_config,
1404 					GB_ADDR_CONFIG,
1405 					MAX_COMPRESSED_FRAGS);
1406 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1407 			REG_GET_FIELD(
1408 					adev->gfx.config.gb_addr_config,
1409 					GB_ADDR_CONFIG,
1410 					NUM_RB_PER_SE);
1411 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1412 			REG_GET_FIELD(
1413 					adev->gfx.config.gb_addr_config,
1414 					GB_ADDR_CONFIG,
1415 					NUM_SHADER_ENGINES);
1416 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1417 			REG_GET_FIELD(
1418 					adev->gfx.config.gb_addr_config,
1419 					GB_ADDR_CONFIG,
1420 					PIPE_INTERLEAVE_SIZE));
1421 
1422 	return 0;
1423 }
1424 
1425 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
1426 				   struct amdgpu_ngg_buf *ngg_buf,
1427 				   int size_se,
1428 				   int default_size_se)
1429 {
1430 	int r;
1431 
1432 	if (size_se < 0) {
1433 		dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se);
1434 		return -EINVAL;
1435 	}
1436 	size_se = size_se ? size_se : default_size_se;
1437 
1438 	ngg_buf->size = size_se * adev->gfx.config.max_shader_engines;
1439 	r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
1440 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1441 				    &ngg_buf->bo,
1442 				    &ngg_buf->gpu_addr,
1443 				    NULL);
1444 	if (r) {
1445 		dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r);
1446 		return r;
1447 	}
1448 	ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo);
1449 
1450 	return r;
1451 }
1452 
1453 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev)
1454 {
1455 	int i;
1456 
1457 	for (i = 0; i < NGG_BUF_MAX; i++)
1458 		amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo,
1459 				      &adev->gfx.ngg.buf[i].gpu_addr,
1460 				      NULL);
1461 
1462 	memset(&adev->gfx.ngg.buf[0], 0,
1463 			sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX);
1464 
1465 	adev->gfx.ngg.init = false;
1466 
1467 	return 0;
1468 }
1469 
1470 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
1471 {
1472 	int r;
1473 
1474 	if (!amdgpu_ngg || adev->gfx.ngg.init == true)
1475 		return 0;
1476 
1477 	/* GDS reserve memory: 64 bytes alignment */
1478 	adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
1479 	adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size;
1480 	adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
1481 	adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
1482 
1483 	/* Primitive Buffer */
1484 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
1485 				    amdgpu_prim_buf_per_se,
1486 				    64 * 1024);
1487 	if (r) {
1488 		dev_err(adev->dev, "Failed to create Primitive Buffer\n");
1489 		goto err;
1490 	}
1491 
1492 	/* Position Buffer */
1493 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS],
1494 				    amdgpu_pos_buf_per_se,
1495 				    256 * 1024);
1496 	if (r) {
1497 		dev_err(adev->dev, "Failed to create Position Buffer\n");
1498 		goto err;
1499 	}
1500 
1501 	/* Control Sideband */
1502 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL],
1503 				    amdgpu_cntl_sb_buf_per_se,
1504 				    256);
1505 	if (r) {
1506 		dev_err(adev->dev, "Failed to create Control Sideband Buffer\n");
1507 		goto err;
1508 	}
1509 
1510 	/* Parameter Cache, not created by default */
1511 	if (amdgpu_param_buf_per_se <= 0)
1512 		goto out;
1513 
1514 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM],
1515 				    amdgpu_param_buf_per_se,
1516 				    512 * 1024);
1517 	if (r) {
1518 		dev_err(adev->dev, "Failed to create Parameter Cache\n");
1519 		goto err;
1520 	}
1521 
1522 out:
1523 	adev->gfx.ngg.init = true;
1524 	return 0;
1525 err:
1526 	gfx_v9_0_ngg_fini(adev);
1527 	return r;
1528 }
1529 
1530 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
1531 {
1532 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
1533 	int r;
1534 	u32 data, base;
1535 
1536 	if (!amdgpu_ngg)
1537 		return 0;
1538 
1539 	/* Program buffer size */
1540 	data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE,
1541 			     adev->gfx.ngg.buf[NGG_PRIM].size >> 8);
1542 	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE,
1543 			     adev->gfx.ngg.buf[NGG_POS].size >> 8);
1544 	WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
1545 
1546 	data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE,
1547 			     adev->gfx.ngg.buf[NGG_CNTL].size >> 8);
1548 	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE,
1549 			     adev->gfx.ngg.buf[NGG_PARAM].size >> 10);
1550 	WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
1551 
1552 	/* Program buffer base address */
1553 	base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
1554 	data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
1555 	WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data);
1556 
1557 	base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
1558 	data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
1559 	WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data);
1560 
1561 	base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
1562 	data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
1563 	WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data);
1564 
1565 	base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
1566 	data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
1567 	WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data);
1568 
1569 	base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
1570 	data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
1571 	WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data);
1572 
1573 	base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
1574 	data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
1575 	WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data);
1576 
1577 	/* Clear GDS reserved memory */
1578 	r = amdgpu_ring_alloc(ring, 17);
1579 	if (r) {
1580 		DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n",
1581 			  ring->name, r);
1582 		return r;
1583 	}
1584 
1585 	gfx_v9_0_write_data_to_reg(ring, 0, false,
1586 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
1587 			           (adev->gds.gds_size +
1588 				    adev->gfx.ngg.gds_reserve_size));
1589 
1590 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
1591 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
1592 				PACKET3_DMA_DATA_DST_SEL(1) |
1593 				PACKET3_DMA_DATA_SRC_SEL(2)));
1594 	amdgpu_ring_write(ring, 0);
1595 	amdgpu_ring_write(ring, 0);
1596 	amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
1597 	amdgpu_ring_write(ring, 0);
1598 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
1599 				adev->gfx.ngg.gds_reserve_size);
1600 
1601 	gfx_v9_0_write_data_to_reg(ring, 0, false,
1602 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0);
1603 
1604 	amdgpu_ring_commit(ring);
1605 
1606 	return 0;
1607 }
1608 
1609 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1610 				      int mec, int pipe, int queue)
1611 {
1612 	int r;
1613 	unsigned irq_type;
1614 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1615 
1616 	ring = &adev->gfx.compute_ring[ring_id];
1617 
1618 	/* mec0 is me1 */
1619 	ring->me = mec + 1;
1620 	ring->pipe = pipe;
1621 	ring->queue = queue;
1622 
1623 	ring->ring_obj = NULL;
1624 	ring->use_doorbell = true;
1625 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
1626 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1627 				+ (ring_id * GFX9_MEC_HPD_SIZE);
1628 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1629 
1630 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1631 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1632 		+ ring->pipe;
1633 
1634 	/* type-2 packets are deprecated on MEC, use type-3 instead */
1635 	r = amdgpu_ring_init(adev, ring, 1024,
1636 			     &adev->gfx.eop_irq, irq_type);
1637 	if (r)
1638 		return r;
1639 
1640 
1641 	return 0;
1642 }
1643 
1644 static int gfx_v9_0_sw_init(void *handle)
1645 {
1646 	int i, j, k, r, ring_id;
1647 	struct amdgpu_ring *ring;
1648 	struct amdgpu_kiq *kiq;
1649 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1650 
1651 	switch (adev->asic_type) {
1652 	case CHIP_VEGA10:
1653 	case CHIP_VEGA12:
1654 	case CHIP_VEGA20:
1655 	case CHIP_RAVEN:
1656 		adev->gfx.mec.num_mec = 2;
1657 		break;
1658 	default:
1659 		adev->gfx.mec.num_mec = 1;
1660 		break;
1661 	}
1662 
1663 	adev->gfx.mec.num_pipe_per_mec = 4;
1664 	adev->gfx.mec.num_queue_per_pipe = 8;
1665 
1666 	/* EOP Event */
1667 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
1668 	if (r)
1669 		return r;
1670 
1671 	/* Privileged reg */
1672 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
1673 			      &adev->gfx.priv_reg_irq);
1674 	if (r)
1675 		return r;
1676 
1677 	/* Privileged inst */
1678 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
1679 			      &adev->gfx.priv_inst_irq);
1680 	if (r)
1681 		return r;
1682 
1683 	/* ECC error */
1684 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
1685 			      &adev->gfx.cp_ecc_error_irq);
1686 	if (r)
1687 		return r;
1688 
1689 	/* FUE error */
1690 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
1691 			      &adev->gfx.cp_ecc_error_irq);
1692 	if (r)
1693 		return r;
1694 
1695 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1696 
1697 	gfx_v9_0_scratch_init(adev);
1698 
1699 	r = gfx_v9_0_init_microcode(adev);
1700 	if (r) {
1701 		DRM_ERROR("Failed to load gfx firmware!\n");
1702 		return r;
1703 	}
1704 
1705 	r = adev->gfx.rlc.funcs->init(adev);
1706 	if (r) {
1707 		DRM_ERROR("Failed to init rlc BOs!\n");
1708 		return r;
1709 	}
1710 
1711 	r = gfx_v9_0_mec_init(adev);
1712 	if (r) {
1713 		DRM_ERROR("Failed to init MEC BOs!\n");
1714 		return r;
1715 	}
1716 
1717 	/* set up the gfx ring */
1718 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1719 		ring = &adev->gfx.gfx_ring[i];
1720 		ring->ring_obj = NULL;
1721 		if (!i)
1722 			sprintf(ring->name, "gfx");
1723 		else
1724 			sprintf(ring->name, "gfx_%d", i);
1725 		ring->use_doorbell = true;
1726 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
1727 		r = amdgpu_ring_init(adev, ring, 1024,
1728 				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
1729 		if (r)
1730 			return r;
1731 	}
1732 
1733 	/* set up the compute queues - allocate horizontally across pipes */
1734 	ring_id = 0;
1735 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1736 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1737 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1738 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
1739 					continue;
1740 
1741 				r = gfx_v9_0_compute_ring_init(adev,
1742 							       ring_id,
1743 							       i, k, j);
1744 				if (r)
1745 					return r;
1746 
1747 				ring_id++;
1748 			}
1749 		}
1750 	}
1751 
1752 	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
1753 	if (r) {
1754 		DRM_ERROR("Failed to init KIQ BOs!\n");
1755 		return r;
1756 	}
1757 
1758 	kiq = &adev->gfx.kiq;
1759 	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
1760 	if (r)
1761 		return r;
1762 
1763 	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
1764 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
1765 	if (r)
1766 		return r;
1767 
1768 	adev->gfx.ce_ram_size = 0x8000;
1769 
1770 	r = gfx_v9_0_gpu_early_init(adev);
1771 	if (r)
1772 		return r;
1773 
1774 	r = gfx_v9_0_ngg_init(adev);
1775 	if (r)
1776 		return r;
1777 
1778 	return 0;
1779 }
1780 
1781 
1782 static int gfx_v9_0_sw_fini(void *handle)
1783 {
1784 	int i;
1785 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1786 
1787 	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
1788 			adev->gfx.ras_if) {
1789 		struct ras_common_if *ras_if = adev->gfx.ras_if;
1790 		struct ras_ih_if ih_info = {
1791 			.head = *ras_if,
1792 		};
1793 
1794 		amdgpu_ras_debugfs_remove(adev, ras_if);
1795 		amdgpu_ras_sysfs_remove(adev, ras_if);
1796 		amdgpu_ras_interrupt_remove_handler(adev,  &ih_info);
1797 		amdgpu_ras_feature_enable(adev, ras_if, 0);
1798 		kfree(ras_if);
1799 	}
1800 
1801 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1802 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1803 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
1804 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1805 
1806 	amdgpu_gfx_mqd_sw_fini(adev);
1807 	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
1808 	amdgpu_gfx_kiq_fini(adev);
1809 
1810 	gfx_v9_0_mec_fini(adev);
1811 	gfx_v9_0_ngg_fini(adev);
1812 	amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1813 	if (adev->asic_type == CHIP_RAVEN) {
1814 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
1815 				&adev->gfx.rlc.cp_table_gpu_addr,
1816 				(void **)&adev->gfx.rlc.cp_table_ptr);
1817 	}
1818 	gfx_v9_0_free_microcode(adev);
1819 
1820 	return 0;
1821 }
1822 
1823 
1824 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
1825 {
1826 	/* TODO */
1827 }
1828 
1829 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
1830 {
1831 	u32 data;
1832 
1833 	if (instance == 0xffffffff)
1834 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
1835 	else
1836 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
1837 
1838 	if (se_num == 0xffffffff)
1839 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
1840 	else
1841 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
1842 
1843 	if (sh_num == 0xffffffff)
1844 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
1845 	else
1846 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
1847 
1848 	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
1849 }
1850 
1851 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1852 {
1853 	u32 data, mask;
1854 
1855 	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
1856 	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
1857 
1858 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1859 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
1860 
1861 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
1862 					 adev->gfx.config.max_sh_per_se);
1863 
1864 	return (~data) & mask;
1865 }
1866 
1867 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
1868 {
1869 	int i, j;
1870 	u32 data;
1871 	u32 active_rbs = 0;
1872 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
1873 					adev->gfx.config.max_sh_per_se;
1874 
1875 	mutex_lock(&adev->grbm_idx_mutex);
1876 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1877 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1878 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1879 			data = gfx_v9_0_get_rb_active_bitmap(adev);
1880 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
1881 					       rb_bitmap_width_per_sh);
1882 		}
1883 	}
1884 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1885 	mutex_unlock(&adev->grbm_idx_mutex);
1886 
1887 	adev->gfx.config.backend_enable_mask = active_rbs;
1888 	adev->gfx.config.num_rbs = hweight32(active_rbs);
1889 }
1890 
1891 #define DEFAULT_SH_MEM_BASES	(0x6000)
1892 #define FIRST_COMPUTE_VMID	(8)
1893 #define LAST_COMPUTE_VMID	(16)
1894 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
1895 {
1896 	int i;
1897 	uint32_t sh_mem_config;
1898 	uint32_t sh_mem_bases;
1899 
1900 	/*
1901 	 * Configure apertures:
1902 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
1903 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
1904 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
1905 	 */
1906 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
1907 
1908 	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
1909 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
1910 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
1911 
1912 	mutex_lock(&adev->srbm_mutex);
1913 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
1914 		soc15_grbm_select(adev, 0, 0, 0, i);
1915 		/* CP and shaders */
1916 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
1917 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
1918 	}
1919 	soc15_grbm_select(adev, 0, 0, 0, 0);
1920 	mutex_unlock(&adev->srbm_mutex);
1921 
1922 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
1923 	   acccess. These should be enabled by FW for target VMIDs. */
1924 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
1925 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
1926 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
1927 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
1928 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
1929 	}
1930 }
1931 
1932 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
1933 {
1934 	u32 tmp;
1935 	int i;
1936 
1937 	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
1938 
1939 	gfx_v9_0_tiling_mode_table_init(adev);
1940 
1941 	gfx_v9_0_setup_rb(adev);
1942 	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
1943 	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
1944 
1945 	/* XXX SH_MEM regs */
1946 	/* where to put LDS, scratch, GPUVM in FSA64 space */
1947 	mutex_lock(&adev->srbm_mutex);
1948 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) {
1949 		soc15_grbm_select(adev, 0, 0, 0, i);
1950 		/* CP and shaders */
1951 		if (i == 0) {
1952 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1953 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1954 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
1955 					    !!amdgpu_noretry);
1956 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
1957 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
1958 		} else {
1959 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1960 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1961 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
1962 					    !!amdgpu_noretry);
1963 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
1964 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
1965 				(adev->gmc.private_aperture_start >> 48));
1966 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
1967 				(adev->gmc.shared_aperture_start >> 48));
1968 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
1969 		}
1970 	}
1971 	soc15_grbm_select(adev, 0, 0, 0, 0);
1972 
1973 	mutex_unlock(&adev->srbm_mutex);
1974 
1975 	gfx_v9_0_init_compute_vmid(adev);
1976 }
1977 
1978 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
1979 {
1980 	u32 i, j, k;
1981 	u32 mask;
1982 
1983 	mutex_lock(&adev->grbm_idx_mutex);
1984 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1985 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1986 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1987 			for (k = 0; k < adev->usec_timeout; k++) {
1988 				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
1989 					break;
1990 				udelay(1);
1991 			}
1992 			if (k == adev->usec_timeout) {
1993 				gfx_v9_0_select_se_sh(adev, 0xffffffff,
1994 						      0xffffffff, 0xffffffff);
1995 				mutex_unlock(&adev->grbm_idx_mutex);
1996 				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
1997 					 i, j);
1998 				return;
1999 			}
2000 		}
2001 	}
2002 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2003 	mutex_unlock(&adev->grbm_idx_mutex);
2004 
2005 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2006 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2007 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2008 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2009 	for (k = 0; k < adev->usec_timeout; k++) {
2010 		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2011 			break;
2012 		udelay(1);
2013 	}
2014 }
2015 
2016 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2017 					       bool enable)
2018 {
2019 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2020 
2021 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2022 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2023 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2024 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2025 
2026 	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2027 }
2028 
2029 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2030 {
2031 	/* csib */
2032 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2033 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2034 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2035 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2036 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2037 			adev->gfx.rlc.clear_state_size);
2038 }
2039 
2040 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2041 				int indirect_offset,
2042 				int list_size,
2043 				int *unique_indirect_regs,
2044 				int unique_indirect_reg_count,
2045 				int *indirect_start_offsets,
2046 				int *indirect_start_offsets_count,
2047 				int max_start_offsets_count)
2048 {
2049 	int idx;
2050 
2051 	for (; indirect_offset < list_size; indirect_offset++) {
2052 		WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2053 		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2054 		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2055 
2056 		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2057 			indirect_offset += 2;
2058 
2059 			/* look for the matching indice */
2060 			for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2061 				if (unique_indirect_regs[idx] ==
2062 					register_list_format[indirect_offset] ||
2063 					!unique_indirect_regs[idx])
2064 					break;
2065 			}
2066 
2067 			BUG_ON(idx >= unique_indirect_reg_count);
2068 
2069 			if (!unique_indirect_regs[idx])
2070 				unique_indirect_regs[idx] = register_list_format[indirect_offset];
2071 
2072 			indirect_offset++;
2073 		}
2074 	}
2075 }
2076 
2077 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2078 {
2079 	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2080 	int unique_indirect_reg_count = 0;
2081 
2082 	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2083 	int indirect_start_offsets_count = 0;
2084 
2085 	int list_size = 0;
2086 	int i = 0, j = 0;
2087 	u32 tmp = 0;
2088 
2089 	u32 *register_list_format =
2090 		kmemdup(adev->gfx.rlc.register_list_format,
2091 			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2092 	if (!register_list_format)
2093 		return -ENOMEM;
2094 
2095 	/* setup unique_indirect_regs array and indirect_start_offsets array */
2096 	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2097 	gfx_v9_1_parse_ind_reg_list(register_list_format,
2098 				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2099 				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2100 				    unique_indirect_regs,
2101 				    unique_indirect_reg_count,
2102 				    indirect_start_offsets,
2103 				    &indirect_start_offsets_count,
2104 				    ARRAY_SIZE(indirect_start_offsets));
2105 
2106 	/* enable auto inc in case it is disabled */
2107 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2108 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2109 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2110 
2111 	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2112 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2113 		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2114 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2115 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2116 			adev->gfx.rlc.register_restore[i]);
2117 
2118 	/* load indirect register */
2119 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2120 		adev->gfx.rlc.reg_list_format_start);
2121 
2122 	/* direct register portion */
2123 	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2124 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2125 			register_list_format[i]);
2126 
2127 	/* indirect register portion */
2128 	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2129 		if (register_list_format[i] == 0xFFFFFFFF) {
2130 			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2131 			continue;
2132 		}
2133 
2134 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2135 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2136 
2137 		for (j = 0; j < unique_indirect_reg_count; j++) {
2138 			if (register_list_format[i] == unique_indirect_regs[j]) {
2139 				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2140 				break;
2141 			}
2142 		}
2143 
2144 		BUG_ON(j >= unique_indirect_reg_count);
2145 
2146 		i++;
2147 	}
2148 
2149 	/* set save/restore list size */
2150 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2151 	list_size = list_size >> 1;
2152 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2153 		adev->gfx.rlc.reg_restore_list_size);
2154 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2155 
2156 	/* write the starting offsets to RLC scratch ram */
2157 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2158 		adev->gfx.rlc.starting_offsets_start);
2159 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2160 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2161 		       indirect_start_offsets[i]);
2162 
2163 	/* load unique indirect regs*/
2164 	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2165 		if (unique_indirect_regs[i] != 0) {
2166 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2167 			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2168 			       unique_indirect_regs[i] & 0x3FFFF);
2169 
2170 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2171 			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2172 			       unique_indirect_regs[i] >> 20);
2173 		}
2174 	}
2175 
2176 	kfree(register_list_format);
2177 	return 0;
2178 }
2179 
2180 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2181 {
2182 	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2183 }
2184 
2185 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2186 					     bool enable)
2187 {
2188 	uint32_t data = 0;
2189 	uint32_t default_data = 0;
2190 
2191 	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2192 	if (enable == true) {
2193 		/* enable GFXIP control over CGPG */
2194 		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2195 		if(default_data != data)
2196 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2197 
2198 		/* update status */
2199 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2200 		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2201 		if(default_data != data)
2202 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2203 	} else {
2204 		/* restore GFXIP control over GCPG */
2205 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2206 		if(default_data != data)
2207 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2208 	}
2209 }
2210 
2211 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2212 {
2213 	uint32_t data = 0;
2214 
2215 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2216 			      AMD_PG_SUPPORT_GFX_SMG |
2217 			      AMD_PG_SUPPORT_GFX_DMG)) {
2218 		/* init IDLE_POLL_COUNT = 60 */
2219 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2220 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2221 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2222 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2223 
2224 		/* init RLC PG Delay */
2225 		data = 0;
2226 		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2227 		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2228 		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2229 		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2230 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2231 
2232 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2233 		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2234 		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2235 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2236 
2237 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2238 		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2239 		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2240 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2241 
2242 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2243 		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2244 
2245 		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2246 		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2247 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2248 
2249 		pwr_10_0_gfxip_control_over_cgpg(adev, true);
2250 	}
2251 }
2252 
2253 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2254 						bool enable)
2255 {
2256 	uint32_t data = 0;
2257 	uint32_t default_data = 0;
2258 
2259 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2260 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2261 			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2262 			     enable ? 1 : 0);
2263 	if (default_data != data)
2264 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2265 }
2266 
2267 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2268 						bool enable)
2269 {
2270 	uint32_t data = 0;
2271 	uint32_t default_data = 0;
2272 
2273 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2274 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2275 			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2276 			     enable ? 1 : 0);
2277 	if(default_data != data)
2278 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2279 }
2280 
2281 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2282 					bool enable)
2283 {
2284 	uint32_t data = 0;
2285 	uint32_t default_data = 0;
2286 
2287 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2288 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2289 			     CP_PG_DISABLE,
2290 			     enable ? 0 : 1);
2291 	if(default_data != data)
2292 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2293 }
2294 
2295 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2296 						bool enable)
2297 {
2298 	uint32_t data, default_data;
2299 
2300 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2301 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2302 			     GFX_POWER_GATING_ENABLE,
2303 			     enable ? 1 : 0);
2304 	if(default_data != data)
2305 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2306 }
2307 
2308 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2309 						bool enable)
2310 {
2311 	uint32_t data, default_data;
2312 
2313 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2314 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2315 			     GFX_PIPELINE_PG_ENABLE,
2316 			     enable ? 1 : 0);
2317 	if(default_data != data)
2318 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2319 
2320 	if (!enable)
2321 		/* read any GFX register to wake up GFX */
2322 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2323 }
2324 
2325 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2326 						       bool enable)
2327 {
2328 	uint32_t data, default_data;
2329 
2330 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2331 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2332 			     STATIC_PER_CU_PG_ENABLE,
2333 			     enable ? 1 : 0);
2334 	if(default_data != data)
2335 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2336 }
2337 
2338 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2339 						bool enable)
2340 {
2341 	uint32_t data, default_data;
2342 
2343 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2344 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2345 			     DYN_PER_CU_PG_ENABLE,
2346 			     enable ? 1 : 0);
2347 	if(default_data != data)
2348 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2349 }
2350 
2351 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2352 {
2353 	gfx_v9_0_init_csb(adev);
2354 
2355 	/*
2356 	 * Rlc save restore list is workable since v2_1.
2357 	 * And it's needed by gfxoff feature.
2358 	 */
2359 	if (adev->gfx.rlc.is_rlc_v2_1) {
2360 		gfx_v9_1_init_rlc_save_restore_list(adev);
2361 		gfx_v9_0_enable_save_restore_machine(adev);
2362 	}
2363 
2364 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2365 			      AMD_PG_SUPPORT_GFX_SMG |
2366 			      AMD_PG_SUPPORT_GFX_DMG |
2367 			      AMD_PG_SUPPORT_CP |
2368 			      AMD_PG_SUPPORT_GDS |
2369 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
2370 		WREG32(mmRLC_JUMP_TABLE_RESTORE,
2371 		       adev->gfx.rlc.cp_table_gpu_addr >> 8);
2372 		gfx_v9_0_init_gfx_power_gating(adev);
2373 	}
2374 }
2375 
2376 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2377 {
2378 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2379 	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2380 	gfx_v9_0_wait_for_rlc_serdes(adev);
2381 }
2382 
2383 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2384 {
2385 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2386 	udelay(50);
2387 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2388 	udelay(50);
2389 }
2390 
2391 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2392 {
2393 #ifdef AMDGPU_RLC_DEBUG_RETRY
2394 	u32 rlc_ucode_ver;
2395 #endif
2396 
2397 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2398 	udelay(50);
2399 
2400 	/* carrizo do enable cp interrupt after cp inited */
2401 	if (!(adev->flags & AMD_IS_APU)) {
2402 		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2403 		udelay(50);
2404 	}
2405 
2406 #ifdef AMDGPU_RLC_DEBUG_RETRY
2407 	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
2408 	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2409 	if(rlc_ucode_ver == 0x108) {
2410 		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2411 				rlc_ucode_ver, adev->gfx.rlc_fw_version);
2412 		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2413 		 * default is 0x9C4 to create a 100us interval */
2414 		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2415 		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2416 		 * to disable the page fault retry interrupts, default is
2417 		 * 0x100 (256) */
2418 		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2419 	}
2420 #endif
2421 }
2422 
2423 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2424 {
2425 	const struct rlc_firmware_header_v2_0 *hdr;
2426 	const __le32 *fw_data;
2427 	unsigned i, fw_size;
2428 
2429 	if (!adev->gfx.rlc_fw)
2430 		return -EINVAL;
2431 
2432 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2433 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
2434 
2435 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2436 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2437 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2438 
2439 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2440 			RLCG_UCODE_LOADING_START_ADDRESS);
2441 	for (i = 0; i < fw_size; i++)
2442 		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2443 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2444 
2445 	return 0;
2446 }
2447 
2448 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2449 {
2450 	int r;
2451 
2452 	if (amdgpu_sriov_vf(adev)) {
2453 		gfx_v9_0_init_csb(adev);
2454 		return 0;
2455 	}
2456 
2457 	adev->gfx.rlc.funcs->stop(adev);
2458 
2459 	/* disable CG */
2460 	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2461 
2462 	gfx_v9_0_init_pg(adev);
2463 
2464 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2465 		/* legacy rlc firmware loading */
2466 		r = gfx_v9_0_rlc_load_microcode(adev);
2467 		if (r)
2468 			return r;
2469 	}
2470 
2471 	switch (adev->asic_type) {
2472 	case CHIP_RAVEN:
2473 		if (amdgpu_lbpw == 0)
2474 			gfx_v9_0_enable_lbpw(adev, false);
2475 		else
2476 			gfx_v9_0_enable_lbpw(adev, true);
2477 		break;
2478 	case CHIP_VEGA20:
2479 		if (amdgpu_lbpw > 0)
2480 			gfx_v9_0_enable_lbpw(adev, true);
2481 		else
2482 			gfx_v9_0_enable_lbpw(adev, false);
2483 		break;
2484 	default:
2485 		break;
2486 	}
2487 
2488 	adev->gfx.rlc.funcs->start(adev);
2489 
2490 	return 0;
2491 }
2492 
2493 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2494 {
2495 	int i;
2496 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
2497 
2498 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2499 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2500 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
2501 	if (!enable) {
2502 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2503 			adev->gfx.gfx_ring[i].sched.ready = false;
2504 	}
2505 	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
2506 	udelay(50);
2507 }
2508 
2509 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2510 {
2511 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
2512 	const struct gfx_firmware_header_v1_0 *ce_hdr;
2513 	const struct gfx_firmware_header_v1_0 *me_hdr;
2514 	const __le32 *fw_data;
2515 	unsigned i, fw_size;
2516 
2517 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2518 		return -EINVAL;
2519 
2520 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2521 		adev->gfx.pfp_fw->data;
2522 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2523 		adev->gfx.ce_fw->data;
2524 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
2525 		adev->gfx.me_fw->data;
2526 
2527 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2528 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2529 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2530 
2531 	gfx_v9_0_cp_gfx_enable(adev, false);
2532 
2533 	/* PFP */
2534 	fw_data = (const __le32 *)
2535 		(adev->gfx.pfp_fw->data +
2536 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2537 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2538 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
2539 	for (i = 0; i < fw_size; i++)
2540 		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2541 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2542 
2543 	/* CE */
2544 	fw_data = (const __le32 *)
2545 		(adev->gfx.ce_fw->data +
2546 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2547 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2548 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
2549 	for (i = 0; i < fw_size; i++)
2550 		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2551 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2552 
2553 	/* ME */
2554 	fw_data = (const __le32 *)
2555 		(adev->gfx.me_fw->data +
2556 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2557 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2558 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
2559 	for (i = 0; i < fw_size; i++)
2560 		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2561 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2562 
2563 	return 0;
2564 }
2565 
2566 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
2567 {
2568 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2569 	const struct cs_section_def *sect = NULL;
2570 	const struct cs_extent_def *ext = NULL;
2571 	int r, i, tmp;
2572 
2573 	/* init the CP */
2574 	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
2575 	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
2576 
2577 	gfx_v9_0_cp_gfx_enable(adev, true);
2578 
2579 	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
2580 	if (r) {
2581 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
2582 		return r;
2583 	}
2584 
2585 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2586 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2587 
2588 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2589 	amdgpu_ring_write(ring, 0x80000000);
2590 	amdgpu_ring_write(ring, 0x80000000);
2591 
2592 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
2593 		for (ext = sect->section; ext->extent != NULL; ++ext) {
2594 			if (sect->id == SECT_CONTEXT) {
2595 				amdgpu_ring_write(ring,
2596 				       PACKET3(PACKET3_SET_CONTEXT_REG,
2597 					       ext->reg_count));
2598 				amdgpu_ring_write(ring,
2599 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
2600 				for (i = 0; i < ext->reg_count; i++)
2601 					amdgpu_ring_write(ring, ext->extent[i]);
2602 			}
2603 		}
2604 	}
2605 
2606 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2607 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2608 
2609 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2610 	amdgpu_ring_write(ring, 0);
2611 
2612 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2613 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2614 	amdgpu_ring_write(ring, 0x8000);
2615 	amdgpu_ring_write(ring, 0x8000);
2616 
2617 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
2618 	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
2619 		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
2620 	amdgpu_ring_write(ring, tmp);
2621 	amdgpu_ring_write(ring, 0);
2622 
2623 	amdgpu_ring_commit(ring);
2624 
2625 	return 0;
2626 }
2627 
2628 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
2629 {
2630 	struct amdgpu_ring *ring;
2631 	u32 tmp;
2632 	u32 rb_bufsz;
2633 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
2634 
2635 	/* Set the write pointer delay */
2636 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
2637 
2638 	/* set the RB to use vmid 0 */
2639 	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
2640 
2641 	/* Set ring buffer size */
2642 	ring = &adev->gfx.gfx_ring[0];
2643 	rb_bufsz = order_base_2(ring->ring_size / 8);
2644 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
2645 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
2646 #ifdef __BIG_ENDIAN
2647 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
2648 #endif
2649 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2650 
2651 	/* Initialize the ring buffer's write pointers */
2652 	ring->wptr = 0;
2653 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
2654 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
2655 
2656 	/* set the wb address wether it's enabled or not */
2657 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2658 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
2659 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
2660 
2661 	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2662 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
2663 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
2664 
2665 	mdelay(1);
2666 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2667 
2668 	rb_addr = ring->gpu_addr >> 8;
2669 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
2670 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
2671 
2672 	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
2673 	if (ring->use_doorbell) {
2674 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2675 				    DOORBELL_OFFSET, ring->doorbell_index);
2676 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2677 				    DOORBELL_EN, 1);
2678 	} else {
2679 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
2680 	}
2681 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
2682 
2683 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
2684 			DOORBELL_RANGE_LOWER, ring->doorbell_index);
2685 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
2686 
2687 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
2688 		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
2689 
2690 
2691 	/* start the ring */
2692 	gfx_v9_0_cp_gfx_start(adev);
2693 	ring->sched.ready = true;
2694 
2695 	return 0;
2696 }
2697 
2698 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
2699 {
2700 	int i;
2701 
2702 	if (enable) {
2703 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
2704 	} else {
2705 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
2706 			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
2707 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
2708 			adev->gfx.compute_ring[i].sched.ready = false;
2709 		adev->gfx.kiq.ring.sched.ready = false;
2710 	}
2711 	udelay(50);
2712 }
2713 
2714 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
2715 {
2716 	const struct gfx_firmware_header_v1_0 *mec_hdr;
2717 	const __le32 *fw_data;
2718 	unsigned i;
2719 	u32 tmp;
2720 
2721 	if (!adev->gfx.mec_fw)
2722 		return -EINVAL;
2723 
2724 	gfx_v9_0_cp_compute_enable(adev, false);
2725 
2726 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
2727 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
2728 
2729 	fw_data = (const __le32 *)
2730 		(adev->gfx.mec_fw->data +
2731 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
2732 	tmp = 0;
2733 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
2734 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2735 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
2736 
2737 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
2738 		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
2739 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
2740 		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
2741 
2742 	/* MEC1 */
2743 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
2744 			 mec_hdr->jt_offset);
2745 	for (i = 0; i < mec_hdr->jt_size; i++)
2746 		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
2747 			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
2748 
2749 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
2750 			adev->gfx.mec_fw_version);
2751 	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
2752 
2753 	return 0;
2754 }
2755 
2756 /* KIQ functions */
2757 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
2758 {
2759 	uint32_t tmp;
2760 	struct amdgpu_device *adev = ring->adev;
2761 
2762 	/* tell RLC which is KIQ queue */
2763 	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
2764 	tmp &= 0xffffff00;
2765 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
2766 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2767 	tmp |= 0x80;
2768 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2769 }
2770 
2771 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
2772 {
2773 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
2774 	uint64_t queue_mask = 0;
2775 	int r, i;
2776 
2777 	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
2778 		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
2779 			continue;
2780 
2781 		/* This situation may be hit in the future if a new HW
2782 		 * generation exposes more than 64 queues. If so, the
2783 		 * definition of queue_mask needs updating */
2784 		if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
2785 			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
2786 			break;
2787 		}
2788 
2789 		queue_mask |= (1ull << i);
2790 	}
2791 
2792 	r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
2793 	if (r) {
2794 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
2795 		return r;
2796 	}
2797 
2798 	/* set resources */
2799 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
2800 	amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
2801 			  PACKET3_SET_RESOURCES_QUEUE_TYPE(0));	/* vmid_mask:0 queue_type:0 (KIQ) */
2802 	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
2803 	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
2804 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
2805 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
2806 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
2807 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
2808 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2809 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
2810 		uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
2811 		uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2812 
2813 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
2814 		/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
2815 		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
2816 				  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
2817 				  PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
2818 				  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
2819 				  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
2820 				  PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
2821 				  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
2822 				  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
2823 				  PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
2824 				  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
2825 		amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
2826 		amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
2827 		amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
2828 		amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
2829 		amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
2830 	}
2831 
2832 	r = amdgpu_ring_test_helper(kiq_ring);
2833 	if (r)
2834 		DRM_ERROR("KCQ enable failed\n");
2835 
2836 	return r;
2837 }
2838 
2839 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
2840 {
2841 	struct amdgpu_device *adev = ring->adev;
2842 	struct v9_mqd *mqd = ring->mqd_ptr;
2843 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
2844 	uint32_t tmp;
2845 
2846 	mqd->header = 0xC0310800;
2847 	mqd->compute_pipelinestat_enable = 0x00000001;
2848 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
2849 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
2850 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
2851 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
2852 	mqd->compute_misc_reserved = 0x00000003;
2853 
2854 	mqd->dynamic_cu_mask_addr_lo =
2855 		lower_32_bits(ring->mqd_gpu_addr
2856 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2857 	mqd->dynamic_cu_mask_addr_hi =
2858 		upper_32_bits(ring->mqd_gpu_addr
2859 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2860 
2861 	eop_base_addr = ring->eop_gpu_addr >> 8;
2862 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
2863 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
2864 
2865 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2866 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
2867 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
2868 			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
2869 
2870 	mqd->cp_hqd_eop_control = tmp;
2871 
2872 	/* enable doorbell? */
2873 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
2874 
2875 	if (ring->use_doorbell) {
2876 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2877 				    DOORBELL_OFFSET, ring->doorbell_index);
2878 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2879 				    DOORBELL_EN, 1);
2880 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2881 				    DOORBELL_SOURCE, 0);
2882 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2883 				    DOORBELL_HIT, 0);
2884 	} else {
2885 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2886 					 DOORBELL_EN, 0);
2887 	}
2888 
2889 	mqd->cp_hqd_pq_doorbell_control = tmp;
2890 
2891 	/* disable the queue if it's active */
2892 	ring->wptr = 0;
2893 	mqd->cp_hqd_dequeue_request = 0;
2894 	mqd->cp_hqd_pq_rptr = 0;
2895 	mqd->cp_hqd_pq_wptr_lo = 0;
2896 	mqd->cp_hqd_pq_wptr_hi = 0;
2897 
2898 	/* set the pointer to the MQD */
2899 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
2900 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
2901 
2902 	/* set MQD vmid to 0 */
2903 	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
2904 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
2905 	mqd->cp_mqd_control = tmp;
2906 
2907 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
2908 	hqd_gpu_addr = ring->gpu_addr >> 8;
2909 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
2910 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
2911 
2912 	/* set up the HQD, this is similar to CP_RB0_CNTL */
2913 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
2914 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
2915 			    (order_base_2(ring->ring_size / 4) - 1));
2916 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
2917 			((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
2918 #ifdef __BIG_ENDIAN
2919 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
2920 #endif
2921 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
2922 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
2923 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
2924 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
2925 	mqd->cp_hqd_pq_control = tmp;
2926 
2927 	/* set the wb address whether it's enabled or not */
2928 	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2929 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
2930 	mqd->cp_hqd_pq_rptr_report_addr_hi =
2931 		upper_32_bits(wb_gpu_addr) & 0xffff;
2932 
2933 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
2934 	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2935 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
2936 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
2937 
2938 	tmp = 0;
2939 	/* enable the doorbell if requested */
2940 	if (ring->use_doorbell) {
2941 		tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
2942 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2943 				DOORBELL_OFFSET, ring->doorbell_index);
2944 
2945 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2946 					 DOORBELL_EN, 1);
2947 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2948 					 DOORBELL_SOURCE, 0);
2949 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2950 					 DOORBELL_HIT, 0);
2951 	}
2952 
2953 	mqd->cp_hqd_pq_doorbell_control = tmp;
2954 
2955 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
2956 	ring->wptr = 0;
2957 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
2958 
2959 	/* set the vmid for the queue */
2960 	mqd->cp_hqd_vmid = 0;
2961 
2962 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
2963 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
2964 	mqd->cp_hqd_persistent_state = tmp;
2965 
2966 	/* set MIN_IB_AVAIL_SIZE */
2967 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
2968 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
2969 	mqd->cp_hqd_ib_control = tmp;
2970 
2971 	/* activate the queue */
2972 	mqd->cp_hqd_active = 1;
2973 
2974 	return 0;
2975 }
2976 
2977 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
2978 {
2979 	struct amdgpu_device *adev = ring->adev;
2980 	struct v9_mqd *mqd = ring->mqd_ptr;
2981 	int j;
2982 
2983 	/* disable wptr polling */
2984 	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
2985 
2986 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
2987 	       mqd->cp_hqd_eop_base_addr_lo);
2988 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
2989 	       mqd->cp_hqd_eop_base_addr_hi);
2990 
2991 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2992 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
2993 	       mqd->cp_hqd_eop_control);
2994 
2995 	/* enable doorbell? */
2996 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
2997 	       mqd->cp_hqd_pq_doorbell_control);
2998 
2999 	/* disable the queue if it's active */
3000 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3001 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3002 		for (j = 0; j < adev->usec_timeout; j++) {
3003 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3004 				break;
3005 			udelay(1);
3006 		}
3007 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3008 		       mqd->cp_hqd_dequeue_request);
3009 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3010 		       mqd->cp_hqd_pq_rptr);
3011 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3012 		       mqd->cp_hqd_pq_wptr_lo);
3013 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3014 		       mqd->cp_hqd_pq_wptr_hi);
3015 	}
3016 
3017 	/* set the pointer to the MQD */
3018 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3019 	       mqd->cp_mqd_base_addr_lo);
3020 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3021 	       mqd->cp_mqd_base_addr_hi);
3022 
3023 	/* set MQD vmid to 0 */
3024 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3025 	       mqd->cp_mqd_control);
3026 
3027 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3028 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3029 	       mqd->cp_hqd_pq_base_lo);
3030 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3031 	       mqd->cp_hqd_pq_base_hi);
3032 
3033 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3034 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3035 	       mqd->cp_hqd_pq_control);
3036 
3037 	/* set the wb address whether it's enabled or not */
3038 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3039 				mqd->cp_hqd_pq_rptr_report_addr_lo);
3040 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3041 				mqd->cp_hqd_pq_rptr_report_addr_hi);
3042 
3043 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3044 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3045 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
3046 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3047 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3048 
3049 	/* enable the doorbell if requested */
3050 	if (ring->use_doorbell) {
3051 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3052 					(adev->doorbell_index.kiq * 2) << 2);
3053 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3054 					(adev->doorbell_index.userqueue_end * 2) << 2);
3055 	}
3056 
3057 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3058 	       mqd->cp_hqd_pq_doorbell_control);
3059 
3060 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3061 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3062 	       mqd->cp_hqd_pq_wptr_lo);
3063 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3064 	       mqd->cp_hqd_pq_wptr_hi);
3065 
3066 	/* set the vmid for the queue */
3067 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3068 
3069 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3070 	       mqd->cp_hqd_persistent_state);
3071 
3072 	/* activate the queue */
3073 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3074 	       mqd->cp_hqd_active);
3075 
3076 	if (ring->use_doorbell)
3077 		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3078 
3079 	return 0;
3080 }
3081 
3082 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3083 {
3084 	struct amdgpu_device *adev = ring->adev;
3085 	int j;
3086 
3087 	/* disable the queue if it's active */
3088 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3089 
3090 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3091 
3092 		for (j = 0; j < adev->usec_timeout; j++) {
3093 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3094 				break;
3095 			udelay(1);
3096 		}
3097 
3098 		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3099 			DRM_DEBUG("KIQ dequeue request failed.\n");
3100 
3101 			/* Manual disable if dequeue request times out */
3102 			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3103 		}
3104 
3105 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3106 		      0);
3107 	}
3108 
3109 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3110 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3111 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3112 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3113 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3114 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3115 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3116 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3117 
3118 	return 0;
3119 }
3120 
3121 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3122 {
3123 	struct amdgpu_device *adev = ring->adev;
3124 	struct v9_mqd *mqd = ring->mqd_ptr;
3125 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3126 
3127 	gfx_v9_0_kiq_setting(ring);
3128 
3129 	if (adev->in_gpu_reset) { /* for GPU_RESET case */
3130 		/* reset MQD to a clean status */
3131 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3132 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3133 
3134 		/* reset ring buffer */
3135 		ring->wptr = 0;
3136 		amdgpu_ring_clear_ring(ring);
3137 
3138 		mutex_lock(&adev->srbm_mutex);
3139 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3140 		gfx_v9_0_kiq_init_register(ring);
3141 		soc15_grbm_select(adev, 0, 0, 0, 0);
3142 		mutex_unlock(&adev->srbm_mutex);
3143 	} else {
3144 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3145 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3146 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3147 		mutex_lock(&adev->srbm_mutex);
3148 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3149 		gfx_v9_0_mqd_init(ring);
3150 		gfx_v9_0_kiq_init_register(ring);
3151 		soc15_grbm_select(adev, 0, 0, 0, 0);
3152 		mutex_unlock(&adev->srbm_mutex);
3153 
3154 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3155 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3156 	}
3157 
3158 	return 0;
3159 }
3160 
3161 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3162 {
3163 	struct amdgpu_device *adev = ring->adev;
3164 	struct v9_mqd *mqd = ring->mqd_ptr;
3165 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
3166 
3167 	if (!adev->in_gpu_reset && !adev->in_suspend) {
3168 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3169 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3170 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3171 		mutex_lock(&adev->srbm_mutex);
3172 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3173 		gfx_v9_0_mqd_init(ring);
3174 		soc15_grbm_select(adev, 0, 0, 0, 0);
3175 		mutex_unlock(&adev->srbm_mutex);
3176 
3177 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3178 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3179 	} else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3180 		/* reset MQD to a clean status */
3181 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3182 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3183 
3184 		/* reset ring buffer */
3185 		ring->wptr = 0;
3186 		amdgpu_ring_clear_ring(ring);
3187 	} else {
3188 		amdgpu_ring_clear_ring(ring);
3189 	}
3190 
3191 	return 0;
3192 }
3193 
3194 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3195 {
3196 	struct amdgpu_ring *ring;
3197 	int r;
3198 
3199 	ring = &adev->gfx.kiq.ring;
3200 
3201 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
3202 	if (unlikely(r != 0))
3203 		return r;
3204 
3205 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3206 	if (unlikely(r != 0))
3207 		return r;
3208 
3209 	gfx_v9_0_kiq_init_queue(ring);
3210 	amdgpu_bo_kunmap(ring->mqd_obj);
3211 	ring->mqd_ptr = NULL;
3212 	amdgpu_bo_unreserve(ring->mqd_obj);
3213 	ring->sched.ready = true;
3214 	return 0;
3215 }
3216 
3217 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3218 {
3219 	struct amdgpu_ring *ring = NULL;
3220 	int r = 0, i;
3221 
3222 	gfx_v9_0_cp_compute_enable(adev, true);
3223 
3224 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3225 		ring = &adev->gfx.compute_ring[i];
3226 
3227 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3228 		if (unlikely(r != 0))
3229 			goto done;
3230 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3231 		if (!r) {
3232 			r = gfx_v9_0_kcq_init_queue(ring);
3233 			amdgpu_bo_kunmap(ring->mqd_obj);
3234 			ring->mqd_ptr = NULL;
3235 		}
3236 		amdgpu_bo_unreserve(ring->mqd_obj);
3237 		if (r)
3238 			goto done;
3239 	}
3240 
3241 	r = gfx_v9_0_kiq_kcq_enable(adev);
3242 done:
3243 	return r;
3244 }
3245 
3246 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3247 {
3248 	int r, i;
3249 	struct amdgpu_ring *ring;
3250 
3251 	if (!(adev->flags & AMD_IS_APU))
3252 		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3253 
3254 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3255 		/* legacy firmware loading */
3256 		r = gfx_v9_0_cp_gfx_load_microcode(adev);
3257 		if (r)
3258 			return r;
3259 
3260 		r = gfx_v9_0_cp_compute_load_microcode(adev);
3261 		if (r)
3262 			return r;
3263 	}
3264 
3265 	r = gfx_v9_0_kiq_resume(adev);
3266 	if (r)
3267 		return r;
3268 
3269 	r = gfx_v9_0_cp_gfx_resume(adev);
3270 	if (r)
3271 		return r;
3272 
3273 	r = gfx_v9_0_kcq_resume(adev);
3274 	if (r)
3275 		return r;
3276 
3277 	ring = &adev->gfx.gfx_ring[0];
3278 	r = amdgpu_ring_test_helper(ring);
3279 	if (r)
3280 		return r;
3281 
3282 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3283 		ring = &adev->gfx.compute_ring[i];
3284 		amdgpu_ring_test_helper(ring);
3285 	}
3286 
3287 	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3288 
3289 	return 0;
3290 }
3291 
3292 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3293 {
3294 	gfx_v9_0_cp_gfx_enable(adev, enable);
3295 	gfx_v9_0_cp_compute_enable(adev, enable);
3296 }
3297 
3298 static int gfx_v9_0_hw_init(void *handle)
3299 {
3300 	int r;
3301 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3302 
3303 	gfx_v9_0_init_golden_registers(adev);
3304 
3305 	gfx_v9_0_constants_init(adev);
3306 
3307 	r = gfx_v9_0_csb_vram_pin(adev);
3308 	if (r)
3309 		return r;
3310 
3311 	r = adev->gfx.rlc.funcs->resume(adev);
3312 	if (r)
3313 		return r;
3314 
3315 	r = gfx_v9_0_cp_resume(adev);
3316 	if (r)
3317 		return r;
3318 
3319 	r = gfx_v9_0_ngg_en(adev);
3320 	if (r)
3321 		return r;
3322 
3323 	return r;
3324 }
3325 
3326 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
3327 {
3328 	int r, i;
3329 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3330 
3331 	r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
3332 	if (r)
3333 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3334 
3335 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3336 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3337 
3338 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
3339 		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3340 						PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
3341 						PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
3342 						PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
3343 						PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
3344 		amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
3345 		amdgpu_ring_write(kiq_ring, 0);
3346 		amdgpu_ring_write(kiq_ring, 0);
3347 		amdgpu_ring_write(kiq_ring, 0);
3348 	}
3349 	r = amdgpu_ring_test_helper(kiq_ring);
3350 	if (r)
3351 		DRM_ERROR("KCQ disable failed\n");
3352 
3353 	return r;
3354 }
3355 
3356 static int gfx_v9_0_hw_fini(void *handle)
3357 {
3358 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3359 
3360 	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3361 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3362 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3363 
3364 	/* disable KCQ to avoid CPC touch memory not valid anymore */
3365 	gfx_v9_0_kcq_disable(adev);
3366 
3367 	if (amdgpu_sriov_vf(adev)) {
3368 		gfx_v9_0_cp_gfx_enable(adev, false);
3369 		/* must disable polling for SRIOV when hw finished, otherwise
3370 		 * CPC engine may still keep fetching WB address which is already
3371 		 * invalid after sw finished and trigger DMAR reading error in
3372 		 * hypervisor side.
3373 		 */
3374 		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3375 		return 0;
3376 	}
3377 
3378 	/* Use deinitialize sequence from CAIL when unbinding device from driver,
3379 	 * otherwise KIQ is hanging when binding back
3380 	 */
3381 	if (!adev->in_gpu_reset && !adev->in_suspend) {
3382 		mutex_lock(&adev->srbm_mutex);
3383 		soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3384 				adev->gfx.kiq.ring.pipe,
3385 				adev->gfx.kiq.ring.queue, 0);
3386 		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3387 		soc15_grbm_select(adev, 0, 0, 0, 0);
3388 		mutex_unlock(&adev->srbm_mutex);
3389 	}
3390 
3391 	gfx_v9_0_cp_enable(adev, false);
3392 	adev->gfx.rlc.funcs->stop(adev);
3393 
3394 	gfx_v9_0_csb_vram_unpin(adev);
3395 
3396 	return 0;
3397 }
3398 
3399 static int gfx_v9_0_suspend(void *handle)
3400 {
3401 	return gfx_v9_0_hw_fini(handle);
3402 }
3403 
3404 static int gfx_v9_0_resume(void *handle)
3405 {
3406 	return gfx_v9_0_hw_init(handle);
3407 }
3408 
3409 static bool gfx_v9_0_is_idle(void *handle)
3410 {
3411 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3412 
3413 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3414 				GRBM_STATUS, GUI_ACTIVE))
3415 		return false;
3416 	else
3417 		return true;
3418 }
3419 
3420 static int gfx_v9_0_wait_for_idle(void *handle)
3421 {
3422 	unsigned i;
3423 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3424 
3425 	for (i = 0; i < adev->usec_timeout; i++) {
3426 		if (gfx_v9_0_is_idle(handle))
3427 			return 0;
3428 		udelay(1);
3429 	}
3430 	return -ETIMEDOUT;
3431 }
3432 
3433 static int gfx_v9_0_soft_reset(void *handle)
3434 {
3435 	u32 grbm_soft_reset = 0;
3436 	u32 tmp;
3437 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3438 
3439 	/* GRBM_STATUS */
3440 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3441 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3442 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3443 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3444 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3445 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3446 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3447 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3448 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3449 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3450 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3451 	}
3452 
3453 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3454 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3455 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3456 	}
3457 
3458 	/* GRBM_STATUS2 */
3459 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3460 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3461 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3462 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3463 
3464 
3465 	if (grbm_soft_reset) {
3466 		/* stop the rlc */
3467 		adev->gfx.rlc.funcs->stop(adev);
3468 
3469 		/* Disable GFX parsing/prefetching */
3470 		gfx_v9_0_cp_gfx_enable(adev, false);
3471 
3472 		/* Disable MEC parsing/prefetching */
3473 		gfx_v9_0_cp_compute_enable(adev, false);
3474 
3475 		if (grbm_soft_reset) {
3476 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3477 			tmp |= grbm_soft_reset;
3478 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3479 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3480 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3481 
3482 			udelay(50);
3483 
3484 			tmp &= ~grbm_soft_reset;
3485 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3486 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3487 		}
3488 
3489 		/* Wait a little for things to settle down */
3490 		udelay(50);
3491 	}
3492 	return 0;
3493 }
3494 
3495 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3496 {
3497 	uint64_t clock;
3498 
3499 	mutex_lock(&adev->gfx.gpu_clock_mutex);
3500 	WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
3501 	clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
3502 		((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
3503 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
3504 	return clock;
3505 }
3506 
3507 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
3508 					  uint32_t vmid,
3509 					  uint32_t gds_base, uint32_t gds_size,
3510 					  uint32_t gws_base, uint32_t gws_size,
3511 					  uint32_t oa_base, uint32_t oa_size)
3512 {
3513 	struct amdgpu_device *adev = ring->adev;
3514 
3515 	/* GDS Base */
3516 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3517 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
3518 				   gds_base);
3519 
3520 	/* GDS Size */
3521 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3522 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
3523 				   gds_size);
3524 
3525 	/* GWS */
3526 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3527 				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
3528 				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
3529 
3530 	/* OA */
3531 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3532 				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
3533 				   (1 << (oa_size + oa_base)) - (1 << oa_base));
3534 }
3535 
3536 static const u32 vgpr_init_compute_shader[] =
3537 {
3538 	0xb07c0000, 0xbe8000ff,
3539 	0x000000f8, 0xbf110800,
3540 	0x7e000280, 0x7e020280,
3541 	0x7e040280, 0x7e060280,
3542 	0x7e080280, 0x7e0a0280,
3543 	0x7e0c0280, 0x7e0e0280,
3544 	0x80808800, 0xbe803200,
3545 	0xbf84fff5, 0xbf9c0000,
3546 	0xd28c0001, 0x0001007f,
3547 	0xd28d0001, 0x0002027e,
3548 	0x10020288, 0xb8810904,
3549 	0xb7814000, 0xd1196a01,
3550 	0x00000301, 0xbe800087,
3551 	0xbefc00c1, 0xd89c4000,
3552 	0x00020201, 0xd89cc080,
3553 	0x00040401, 0x320202ff,
3554 	0x00000800, 0x80808100,
3555 	0xbf84fff8, 0x7e020280,
3556 	0xbf810000, 0x00000000,
3557 };
3558 
3559 static const u32 sgpr_init_compute_shader[] =
3560 {
3561 	0xb07c0000, 0xbe8000ff,
3562 	0x0000005f, 0xbee50080,
3563 	0xbe812c65, 0xbe822c65,
3564 	0xbe832c65, 0xbe842c65,
3565 	0xbe852c65, 0xb77c0005,
3566 	0x80808500, 0xbf84fff8,
3567 	0xbe800080, 0xbf810000,
3568 };
3569 
3570 static const struct soc15_reg_entry vgpr_init_regs[] = {
3571    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3572    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3573    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3574    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3575    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3576    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3577    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3578    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3579    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
3580    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
3581 };
3582 
3583 static const struct soc15_reg_entry sgpr_init_regs[] = {
3584    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3585    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3586    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3587    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3588    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3589    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3590    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3591    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3592    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
3593    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
3594 };
3595 
3596 static const struct soc15_reg_entry sec_ded_counter_registers[] = {
3597    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
3598    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
3599    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
3600    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
3601    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
3602    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
3603    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
3604    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
3605    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
3606    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
3607    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
3608    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
3609    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
3610    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
3611    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
3612    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
3613    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
3614    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
3615    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
3616    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
3617    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
3618    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
3619    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
3620    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
3621    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
3622    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
3623    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
3624    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
3625    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
3626    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
3627    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
3628    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
3629 };
3630 
3631 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
3632 {
3633 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
3634 	int i, r;
3635 
3636 	r = amdgpu_ring_alloc(ring, 7);
3637 	if (r) {
3638 		DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
3639 			ring->name, r);
3640 		return r;
3641 	}
3642 
3643 	WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
3644 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
3645 
3646 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3647 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
3648 				PACKET3_DMA_DATA_DST_SEL(1) |
3649 				PACKET3_DMA_DATA_SRC_SEL(2) |
3650 				PACKET3_DMA_DATA_ENGINE(0)));
3651 	amdgpu_ring_write(ring, 0);
3652 	amdgpu_ring_write(ring, 0);
3653 	amdgpu_ring_write(ring, 0);
3654 	amdgpu_ring_write(ring, 0);
3655 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
3656 				adev->gds.gds_size);
3657 
3658 	amdgpu_ring_commit(ring);
3659 
3660 	for (i = 0; i < adev->usec_timeout; i++) {
3661 		if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
3662 			break;
3663 		udelay(1);
3664 	}
3665 
3666 	if (i >= adev->usec_timeout)
3667 		r = -ETIMEDOUT;
3668 
3669 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
3670 
3671 	return r;
3672 }
3673 
3674 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
3675 {
3676 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
3677 	struct amdgpu_ib ib;
3678 	struct dma_fence *f = NULL;
3679 	int r, i, j, k;
3680 	unsigned total_size, vgpr_offset, sgpr_offset;
3681 	u64 gpu_addr;
3682 
3683 	/* only support when RAS is enabled */
3684 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
3685 		return 0;
3686 
3687 	/* bail if the compute ring is not ready */
3688 	if (!ring->sched.ready)
3689 		return 0;
3690 
3691 	total_size =
3692 		((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
3693 	total_size +=
3694 		((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
3695 	total_size = ALIGN(total_size, 256);
3696 	vgpr_offset = total_size;
3697 	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
3698 	sgpr_offset = total_size;
3699 	total_size += sizeof(sgpr_init_compute_shader);
3700 
3701 	/* allocate an indirect buffer to put the commands in */
3702 	memset(&ib, 0, sizeof(ib));
3703 	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
3704 	if (r) {
3705 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
3706 		return r;
3707 	}
3708 
3709 	/* load the compute shaders */
3710 	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
3711 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
3712 
3713 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
3714 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
3715 
3716 	/* init the ib length to 0 */
3717 	ib.length_dw = 0;
3718 
3719 	/* VGPR */
3720 	/* write the register state for the compute dispatch */
3721 	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
3722 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
3723 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
3724 								- PACKET3_SET_SH_REG_START;
3725 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
3726 	}
3727 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
3728 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
3729 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
3730 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
3731 							- PACKET3_SET_SH_REG_START;
3732 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
3733 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
3734 
3735 	/* write dispatch packet */
3736 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
3737 	ib.ptr[ib.length_dw++] = 128; /* x */
3738 	ib.ptr[ib.length_dw++] = 1; /* y */
3739 	ib.ptr[ib.length_dw++] = 1; /* z */
3740 	ib.ptr[ib.length_dw++] =
3741 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
3742 
3743 	/* write CS partial flush packet */
3744 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
3745 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
3746 
3747 	/* SGPR */
3748 	/* write the register state for the compute dispatch */
3749 	for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
3750 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
3751 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
3752 								- PACKET3_SET_SH_REG_START;
3753 		ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
3754 	}
3755 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
3756 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
3757 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
3758 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
3759 							- PACKET3_SET_SH_REG_START;
3760 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
3761 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
3762 
3763 	/* write dispatch packet */
3764 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
3765 	ib.ptr[ib.length_dw++] = 128; /* x */
3766 	ib.ptr[ib.length_dw++] = 1; /* y */
3767 	ib.ptr[ib.length_dw++] = 1; /* z */
3768 	ib.ptr[ib.length_dw++] =
3769 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
3770 
3771 	/* write CS partial flush packet */
3772 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
3773 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
3774 
3775 	/* shedule the ib on the ring */
3776 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
3777 	if (r) {
3778 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
3779 		goto fail;
3780 	}
3781 
3782 	/* wait for the GPU to finish processing the IB */
3783 	r = dma_fence_wait(f, false);
3784 	if (r) {
3785 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
3786 		goto fail;
3787 	}
3788 
3789 	/* read back registers to clear the counters */
3790 	mutex_lock(&adev->grbm_idx_mutex);
3791 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
3792 		for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
3793 			for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
3794 				gfx_v9_0_select_se_sh(adev, j, 0x0, k);
3795 				RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
3796 			}
3797 		}
3798 	}
3799 	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
3800 	mutex_unlock(&adev->grbm_idx_mutex);
3801 
3802 fail:
3803 	amdgpu_ib_free(adev, &ib, NULL);
3804 	dma_fence_put(f);
3805 
3806 	return r;
3807 }
3808 
3809 static int gfx_v9_0_early_init(void *handle)
3810 {
3811 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3812 
3813 	adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
3814 	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
3815 	gfx_v9_0_set_ring_funcs(adev);
3816 	gfx_v9_0_set_irq_funcs(adev);
3817 	gfx_v9_0_set_gds_init(adev);
3818 	gfx_v9_0_set_rlc_funcs(adev);
3819 
3820 	return 0;
3821 }
3822 
3823 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
3824 		struct amdgpu_iv_entry *entry);
3825 
3826 static int gfx_v9_0_ecc_late_init(void *handle)
3827 {
3828 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3829 	struct ras_common_if **ras_if = &adev->gfx.ras_if;
3830 	struct ras_ih_if ih_info = {
3831 		.cb = gfx_v9_0_process_ras_data_cb,
3832 	};
3833 	struct ras_fs_if fs_info = {
3834 		.sysfs_name = "gfx_err_count",
3835 		.debugfs_name = "gfx_err_inject",
3836 	};
3837 	struct ras_common_if ras_block = {
3838 		.block = AMDGPU_RAS_BLOCK__GFX,
3839 		.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
3840 		.sub_block_index = 0,
3841 		.name = "gfx",
3842 	};
3843 	int r;
3844 
3845 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
3846 		amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
3847 		return 0;
3848 	}
3849 
3850 	r = gfx_v9_0_do_edc_gds_workarounds(adev);
3851 	if (r)
3852 		return r;
3853 
3854 	/* requires IBs so do in late init after IB pool is initialized */
3855 	r = gfx_v9_0_do_edc_gpr_workarounds(adev);
3856 	if (r)
3857 		return r;
3858 
3859 	/* handle resume path. */
3860 	if (*ras_if) {
3861 		/* resend ras TA enable cmd during resume.
3862 		 * prepare to handle failure.
3863 		 */
3864 		ih_info.head = **ras_if;
3865 		r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
3866 		if (r) {
3867 			if (r == -EAGAIN) {
3868 				/* request a gpu reset. will run again. */
3869 				amdgpu_ras_request_reset_on_boot(adev,
3870 						AMDGPU_RAS_BLOCK__GFX);
3871 				return 0;
3872 			}
3873 			/* fail to enable ras, cleanup all. */
3874 			goto irq;
3875 		}
3876 		/* enable successfully. continue. */
3877 		goto resume;
3878 	}
3879 
3880 	*ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
3881 	if (!*ras_if)
3882 		return -ENOMEM;
3883 
3884 	**ras_if = ras_block;
3885 
3886 	r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
3887 	if (r) {
3888 		if (r == -EAGAIN) {
3889 			amdgpu_ras_request_reset_on_boot(adev,
3890 					AMDGPU_RAS_BLOCK__GFX);
3891 			r = 0;
3892 		}
3893 		goto feature;
3894 	}
3895 
3896 	ih_info.head = **ras_if;
3897 	fs_info.head = **ras_if;
3898 
3899 	r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
3900 	if (r)
3901 		goto interrupt;
3902 
3903 	amdgpu_ras_debugfs_create(adev, &fs_info);
3904 
3905 	r = amdgpu_ras_sysfs_create(adev, &fs_info);
3906 	if (r)
3907 		goto sysfs;
3908 resume:
3909 	r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
3910 	if (r)
3911 		goto irq;
3912 
3913 	return 0;
3914 irq:
3915 	amdgpu_ras_sysfs_remove(adev, *ras_if);
3916 sysfs:
3917 	amdgpu_ras_debugfs_remove(adev, *ras_if);
3918 	amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
3919 interrupt:
3920 	amdgpu_ras_feature_enable(adev, *ras_if, 0);
3921 feature:
3922 	kfree(*ras_if);
3923 	*ras_if = NULL;
3924 	return r;
3925 }
3926 
3927 static int gfx_v9_0_late_init(void *handle)
3928 {
3929 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3930 	int r;
3931 
3932 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
3933 	if (r)
3934 		return r;
3935 
3936 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
3937 	if (r)
3938 		return r;
3939 
3940 	r = gfx_v9_0_ecc_late_init(handle);
3941 	if (r)
3942 		return r;
3943 
3944 	return 0;
3945 }
3946 
3947 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
3948 {
3949 	uint32_t rlc_setting;
3950 
3951 	/* if RLC is not enabled, do nothing */
3952 	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
3953 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
3954 		return false;
3955 
3956 	return true;
3957 }
3958 
3959 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
3960 {
3961 	uint32_t data;
3962 	unsigned i;
3963 
3964 	data = RLC_SAFE_MODE__CMD_MASK;
3965 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
3966 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
3967 
3968 	/* wait for RLC_SAFE_MODE */
3969 	for (i = 0; i < adev->usec_timeout; i++) {
3970 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
3971 			break;
3972 		udelay(1);
3973 	}
3974 }
3975 
3976 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
3977 {
3978 	uint32_t data;
3979 
3980 	data = RLC_SAFE_MODE__CMD_MASK;
3981 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
3982 }
3983 
3984 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
3985 						bool enable)
3986 {
3987 	amdgpu_gfx_rlc_enter_safe_mode(adev);
3988 
3989 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
3990 		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
3991 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
3992 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
3993 	} else {
3994 		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
3995 		gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
3996 	}
3997 
3998 	amdgpu_gfx_rlc_exit_safe_mode(adev);
3999 }
4000 
4001 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4002 						bool enable)
4003 {
4004 	/* TODO: double check if we need to perform under safe mode */
4005 	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
4006 
4007 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4008 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4009 	else
4010 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4011 
4012 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4013 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4014 	else
4015 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4016 
4017 	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
4018 }
4019 
4020 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4021 						      bool enable)
4022 {
4023 	uint32_t data, def;
4024 
4025 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4026 
4027 	/* It is disabled by HW by default */
4028 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4029 		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
4030 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4031 
4032 		if (adev->asic_type != CHIP_VEGA12)
4033 			data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4034 
4035 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4036 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4037 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4038 
4039 		/* only for Vega10 & Raven1 */
4040 		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4041 
4042 		if (def != data)
4043 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4044 
4045 		/* MGLS is a global flag to control all MGLS in GFX */
4046 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4047 			/* 2 - RLC memory Light sleep */
4048 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4049 				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4050 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4051 				if (def != data)
4052 					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4053 			}
4054 			/* 3 - CP memory Light sleep */
4055 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4056 				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4057 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4058 				if (def != data)
4059 					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4060 			}
4061 		}
4062 	} else {
4063 		/* 1 - MGCG_OVERRIDE */
4064 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4065 
4066 		if (adev->asic_type != CHIP_VEGA12)
4067 			data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4068 
4069 		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4070 			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4071 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4072 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4073 
4074 		if (def != data)
4075 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4076 
4077 		/* 2 - disable MGLS in RLC */
4078 		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4079 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4080 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4081 			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4082 		}
4083 
4084 		/* 3 - disable MGLS in CP */
4085 		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4086 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4087 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4088 			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4089 		}
4090 	}
4091 
4092 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4093 }
4094 
4095 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4096 					   bool enable)
4097 {
4098 	uint32_t data, def;
4099 
4100 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4101 
4102 	/* Enable 3D CGCG/CGLS */
4103 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4104 		/* write cmd to clear cgcg/cgls ov */
4105 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4106 		/* unset CGCG override */
4107 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4108 		/* update CGCG and CGLS override bits */
4109 		if (def != data)
4110 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4111 
4112 		/* enable 3Dcgcg FSM(0x0000363f) */
4113 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4114 
4115 		data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4116 			RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4117 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4118 			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4119 				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4120 		if (def != data)
4121 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4122 
4123 		/* set IDLE_POLL_COUNT(0x00900100) */
4124 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4125 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4126 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4127 		if (def != data)
4128 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4129 	} else {
4130 		/* Disable CGCG/CGLS */
4131 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4132 		/* disable cgcg, cgls should be disabled */
4133 		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4134 			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4135 		/* disable cgcg and cgls in FSM */
4136 		if (def != data)
4137 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4138 	}
4139 
4140 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4141 }
4142 
4143 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4144 						      bool enable)
4145 {
4146 	uint32_t def, data;
4147 
4148 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4149 
4150 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4151 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4152 		/* unset CGCG override */
4153 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4154 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4155 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4156 		else
4157 			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4158 		/* update CGCG and CGLS override bits */
4159 		if (def != data)
4160 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4161 
4162 		/* enable cgcg FSM(0x0000363F) */
4163 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4164 
4165 		data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4166 			RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4167 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4168 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4169 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4170 		if (def != data)
4171 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4172 
4173 		/* set IDLE_POLL_COUNT(0x00900100) */
4174 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4175 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4176 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4177 		if (def != data)
4178 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4179 	} else {
4180 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4181 		/* reset CGCG/CGLS bits */
4182 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4183 		/* disable cgcg and cgls in FSM */
4184 		if (def != data)
4185 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4186 	}
4187 
4188 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4189 }
4190 
4191 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4192 					    bool enable)
4193 {
4194 	if (enable) {
4195 		/* CGCG/CGLS should be enabled after MGCG/MGLS
4196 		 * ===  MGCG + MGLS ===
4197 		 */
4198 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4199 		/* ===  CGCG /CGLS for GFX 3D Only === */
4200 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4201 		/* ===  CGCG + CGLS === */
4202 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4203 	} else {
4204 		/* CGCG/CGLS should be disabled before MGCG/MGLS
4205 		 * ===  CGCG + CGLS ===
4206 		 */
4207 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4208 		/* ===  CGCG /CGLS for GFX 3D Only === */
4209 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4210 		/* ===  MGCG + MGLS === */
4211 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4212 	}
4213 	return 0;
4214 }
4215 
4216 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4217 	.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4218 	.set_safe_mode = gfx_v9_0_set_safe_mode,
4219 	.unset_safe_mode = gfx_v9_0_unset_safe_mode,
4220 	.init = gfx_v9_0_rlc_init,
4221 	.get_csb_size = gfx_v9_0_get_csb_size,
4222 	.get_csb_buffer = gfx_v9_0_get_csb_buffer,
4223 	.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4224 	.resume = gfx_v9_0_rlc_resume,
4225 	.stop = gfx_v9_0_rlc_stop,
4226 	.reset = gfx_v9_0_rlc_reset,
4227 	.start = gfx_v9_0_rlc_start
4228 };
4229 
4230 static int gfx_v9_0_set_powergating_state(void *handle,
4231 					  enum amd_powergating_state state)
4232 {
4233 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4234 	bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
4235 
4236 	switch (adev->asic_type) {
4237 	case CHIP_RAVEN:
4238 		if (!enable) {
4239 			amdgpu_gfx_off_ctrl(adev, false);
4240 			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4241 		}
4242 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4243 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4244 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4245 		} else {
4246 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4247 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4248 		}
4249 
4250 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4251 			gfx_v9_0_enable_cp_power_gating(adev, true);
4252 		else
4253 			gfx_v9_0_enable_cp_power_gating(adev, false);
4254 
4255 		/* update gfx cgpg state */
4256 		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4257 
4258 		/* update mgcg state */
4259 		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4260 
4261 		if (enable)
4262 			amdgpu_gfx_off_ctrl(adev, true);
4263 		break;
4264 	case CHIP_VEGA12:
4265 		if (!enable) {
4266 			amdgpu_gfx_off_ctrl(adev, false);
4267 			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4268 		} else {
4269 			amdgpu_gfx_off_ctrl(adev, true);
4270 		}
4271 		break;
4272 	default:
4273 		break;
4274 	}
4275 
4276 	return 0;
4277 }
4278 
4279 static int gfx_v9_0_set_clockgating_state(void *handle,
4280 					  enum amd_clockgating_state state)
4281 {
4282 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4283 
4284 	if (amdgpu_sriov_vf(adev))
4285 		return 0;
4286 
4287 	switch (adev->asic_type) {
4288 	case CHIP_VEGA10:
4289 	case CHIP_VEGA12:
4290 	case CHIP_VEGA20:
4291 	case CHIP_RAVEN:
4292 		gfx_v9_0_update_gfx_clock_gating(adev,
4293 						 state == AMD_CG_STATE_GATE ? true : false);
4294 		break;
4295 	default:
4296 		break;
4297 	}
4298 	return 0;
4299 }
4300 
4301 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4302 {
4303 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4304 	int data;
4305 
4306 	if (amdgpu_sriov_vf(adev))
4307 		*flags = 0;
4308 
4309 	/* AMD_CG_SUPPORT_GFX_MGCG */
4310 	data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4311 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4312 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
4313 
4314 	/* AMD_CG_SUPPORT_GFX_CGCG */
4315 	data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4316 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4317 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
4318 
4319 	/* AMD_CG_SUPPORT_GFX_CGLS */
4320 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4321 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
4322 
4323 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
4324 	data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4325 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4326 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4327 
4328 	/* AMD_CG_SUPPORT_GFX_CP_LS */
4329 	data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4330 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4331 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4332 
4333 	/* AMD_CG_SUPPORT_GFX_3D_CGCG */
4334 	data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4335 	if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4336 		*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4337 
4338 	/* AMD_CG_SUPPORT_GFX_3D_CGLS */
4339 	if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4340 		*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4341 }
4342 
4343 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4344 {
4345 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4346 }
4347 
4348 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4349 {
4350 	struct amdgpu_device *adev = ring->adev;
4351 	u64 wptr;
4352 
4353 	/* XXX check if swapping is necessary on BE */
4354 	if (ring->use_doorbell) {
4355 		wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4356 	} else {
4357 		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4358 		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4359 	}
4360 
4361 	return wptr;
4362 }
4363 
4364 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4365 {
4366 	struct amdgpu_device *adev = ring->adev;
4367 
4368 	if (ring->use_doorbell) {
4369 		/* XXX check if swapping is necessary on BE */
4370 		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4371 		WDOORBELL64(ring->doorbell_index, ring->wptr);
4372 	} else {
4373 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4374 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4375 	}
4376 }
4377 
4378 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4379 {
4380 	struct amdgpu_device *adev = ring->adev;
4381 	u32 ref_and_mask, reg_mem_engine;
4382 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
4383 
4384 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4385 		switch (ring->me) {
4386 		case 1:
4387 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4388 			break;
4389 		case 2:
4390 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4391 			break;
4392 		default:
4393 			return;
4394 		}
4395 		reg_mem_engine = 0;
4396 	} else {
4397 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4398 		reg_mem_engine = 1; /* pfp */
4399 	}
4400 
4401 	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4402 			      adev->nbio_funcs->get_hdp_flush_req_offset(adev),
4403 			      adev->nbio_funcs->get_hdp_flush_done_offset(adev),
4404 			      ref_and_mask, ref_and_mask, 0x20);
4405 }
4406 
4407 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4408 					struct amdgpu_job *job,
4409 					struct amdgpu_ib *ib,
4410 					uint32_t flags)
4411 {
4412 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4413 	u32 header, control = 0;
4414 
4415 	if (ib->flags & AMDGPU_IB_FLAG_CE)
4416 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4417 	else
4418 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4419 
4420 	control |= ib->length_dw | (vmid << 24);
4421 
4422 	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
4423 		control |= INDIRECT_BUFFER_PRE_ENB(1);
4424 
4425 		if (!(ib->flags & AMDGPU_IB_FLAG_CE))
4426 			gfx_v9_0_ring_emit_de_meta(ring);
4427 	}
4428 
4429 	amdgpu_ring_write(ring, header);
4430 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4431 	amdgpu_ring_write(ring,
4432 #ifdef __BIG_ENDIAN
4433 		(2 << 0) |
4434 #endif
4435 		lower_32_bits(ib->gpu_addr));
4436 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4437 	amdgpu_ring_write(ring, control);
4438 }
4439 
4440 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4441 					  struct amdgpu_job *job,
4442 					  struct amdgpu_ib *ib,
4443 					  uint32_t flags)
4444 {
4445 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4446 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
4447 
4448 	/* Currently, there is a high possibility to get wave ID mismatch
4449 	 * between ME and GDS, leading to a hw deadlock, because ME generates
4450 	 * different wave IDs than the GDS expects. This situation happens
4451 	 * randomly when at least 5 compute pipes use GDS ordered append.
4452 	 * The wave IDs generated by ME are also wrong after suspend/resume.
4453 	 * Those are probably bugs somewhere else in the kernel driver.
4454 	 *
4455 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
4456 	 * GDS to 0 for this ring (me/pipe).
4457 	 */
4458 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
4459 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
4460 		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
4461 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
4462 	}
4463 
4464 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
4465 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4466 	amdgpu_ring_write(ring,
4467 #ifdef __BIG_ENDIAN
4468 				(2 << 0) |
4469 #endif
4470 				lower_32_bits(ib->gpu_addr));
4471 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4472 	amdgpu_ring_write(ring, control);
4473 }
4474 
4475 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
4476 				     u64 seq, unsigned flags)
4477 {
4478 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4479 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4480 	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
4481 
4482 	/* RELEASE_MEM - flush caches, send int */
4483 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
4484 	amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
4485 					       EOP_TC_NC_ACTION_EN) :
4486 					      (EOP_TCL1_ACTION_EN |
4487 					       EOP_TC_ACTION_EN |
4488 					       EOP_TC_WB_ACTION_EN |
4489 					       EOP_TC_MD_ACTION_EN)) |
4490 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4491 				 EVENT_INDEX(5)));
4492 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4493 
4494 	/*
4495 	 * the address should be Qword aligned if 64bit write, Dword
4496 	 * aligned if only send 32bit data low (discard data high)
4497 	 */
4498 	if (write64bit)
4499 		BUG_ON(addr & 0x7);
4500 	else
4501 		BUG_ON(addr & 0x3);
4502 	amdgpu_ring_write(ring, lower_32_bits(addr));
4503 	amdgpu_ring_write(ring, upper_32_bits(addr));
4504 	amdgpu_ring_write(ring, lower_32_bits(seq));
4505 	amdgpu_ring_write(ring, upper_32_bits(seq));
4506 	amdgpu_ring_write(ring, 0);
4507 }
4508 
4509 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
4510 {
4511 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4512 	uint32_t seq = ring->fence_drv.sync_seq;
4513 	uint64_t addr = ring->fence_drv.gpu_addr;
4514 
4515 	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
4516 			      lower_32_bits(addr), upper_32_bits(addr),
4517 			      seq, 0xffffffff, 4);
4518 }
4519 
4520 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4521 					unsigned vmid, uint64_t pd_addr)
4522 {
4523 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
4524 
4525 	/* compute doesn't have PFP */
4526 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
4527 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
4528 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4529 		amdgpu_ring_write(ring, 0x0);
4530 	}
4531 }
4532 
4533 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4534 {
4535 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
4536 }
4537 
4538 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4539 {
4540 	u64 wptr;
4541 
4542 	/* XXX check if swapping is necessary on BE */
4543 	if (ring->use_doorbell)
4544 		wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
4545 	else
4546 		BUG();
4547 	return wptr;
4548 }
4549 
4550 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
4551 					   bool acquire)
4552 {
4553 	struct amdgpu_device *adev = ring->adev;
4554 	int pipe_num, tmp, reg;
4555 	int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
4556 
4557 	pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
4558 
4559 	/* first me only has 2 entries, GFX and HP3D */
4560 	if (ring->me > 0)
4561 		pipe_num -= 2;
4562 
4563 	reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
4564 	tmp = RREG32(reg);
4565 	tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
4566 	WREG32(reg, tmp);
4567 }
4568 
4569 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
4570 					    struct amdgpu_ring *ring,
4571 					    bool acquire)
4572 {
4573 	int i, pipe;
4574 	bool reserve;
4575 	struct amdgpu_ring *iring;
4576 
4577 	mutex_lock(&adev->gfx.pipe_reserve_mutex);
4578 	pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
4579 	if (acquire)
4580 		set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4581 	else
4582 		clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4583 
4584 	if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
4585 		/* Clear all reservations - everyone reacquires all resources */
4586 		for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
4587 			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
4588 						       true);
4589 
4590 		for (i = 0; i < adev->gfx.num_compute_rings; ++i)
4591 			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
4592 						       true);
4593 	} else {
4594 		/* Lower all pipes without a current reservation */
4595 		for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
4596 			iring = &adev->gfx.gfx_ring[i];
4597 			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
4598 							   iring->me,
4599 							   iring->pipe,
4600 							   0);
4601 			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4602 			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4603 		}
4604 
4605 		for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
4606 			iring = &adev->gfx.compute_ring[i];
4607 			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
4608 							   iring->me,
4609 							   iring->pipe,
4610 							   0);
4611 			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4612 			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4613 		}
4614 	}
4615 
4616 	mutex_unlock(&adev->gfx.pipe_reserve_mutex);
4617 }
4618 
4619 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
4620 				      struct amdgpu_ring *ring,
4621 				      bool acquire)
4622 {
4623 	uint32_t pipe_priority = acquire ? 0x2 : 0x0;
4624 	uint32_t queue_priority = acquire ? 0xf : 0x0;
4625 
4626 	mutex_lock(&adev->srbm_mutex);
4627 	soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4628 
4629 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
4630 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
4631 
4632 	soc15_grbm_select(adev, 0, 0, 0, 0);
4633 	mutex_unlock(&adev->srbm_mutex);
4634 }
4635 
4636 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
4637 					       enum drm_sched_priority priority)
4638 {
4639 	struct amdgpu_device *adev = ring->adev;
4640 	bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
4641 
4642 	if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
4643 		return;
4644 
4645 	gfx_v9_0_hqd_set_priority(adev, ring, acquire);
4646 	gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
4647 }
4648 
4649 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4650 {
4651 	struct amdgpu_device *adev = ring->adev;
4652 
4653 	/* XXX check if swapping is necessary on BE */
4654 	if (ring->use_doorbell) {
4655 		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4656 		WDOORBELL64(ring->doorbell_index, ring->wptr);
4657 	} else{
4658 		BUG(); /* only DOORBELL method supported on gfx9 now */
4659 	}
4660 }
4661 
4662 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
4663 					 u64 seq, unsigned int flags)
4664 {
4665 	struct amdgpu_device *adev = ring->adev;
4666 
4667 	/* we only allocate 32bit for each seq wb address */
4668 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
4669 
4670 	/* write fence seq to the "addr" */
4671 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4672 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4673 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
4674 	amdgpu_ring_write(ring, lower_32_bits(addr));
4675 	amdgpu_ring_write(ring, upper_32_bits(addr));
4676 	amdgpu_ring_write(ring, lower_32_bits(seq));
4677 
4678 	if (flags & AMDGPU_FENCE_FLAG_INT) {
4679 		/* set register to trigger INT */
4680 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4681 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4682 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
4683 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
4684 		amdgpu_ring_write(ring, 0);
4685 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
4686 	}
4687 }
4688 
4689 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
4690 {
4691 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4692 	amdgpu_ring_write(ring, 0);
4693 }
4694 
4695 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
4696 {
4697 	struct v9_ce_ib_state ce_payload = {0};
4698 	uint64_t csa_addr;
4699 	int cnt;
4700 
4701 	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
4702 	csa_addr = amdgpu_csa_vaddr(ring->adev);
4703 
4704 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
4705 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
4706 				 WRITE_DATA_DST_SEL(8) |
4707 				 WR_CONFIRM) |
4708 				 WRITE_DATA_CACHE_POLICY(0));
4709 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
4710 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
4711 	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
4712 }
4713 
4714 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
4715 {
4716 	struct v9_de_ib_state de_payload = {0};
4717 	uint64_t csa_addr, gds_addr;
4718 	int cnt;
4719 
4720 	csa_addr = amdgpu_csa_vaddr(ring->adev);
4721 	gds_addr = csa_addr + 4096;
4722 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
4723 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
4724 
4725 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
4726 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
4727 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
4728 				 WRITE_DATA_DST_SEL(8) |
4729 				 WR_CONFIRM) |
4730 				 WRITE_DATA_CACHE_POLICY(0));
4731 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
4732 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
4733 	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
4734 }
4735 
4736 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
4737 {
4738 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
4739 	amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
4740 }
4741 
4742 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
4743 {
4744 	uint32_t dw2 = 0;
4745 
4746 	if (amdgpu_sriov_vf(ring->adev))
4747 		gfx_v9_0_ring_emit_ce_meta(ring);
4748 
4749 	gfx_v9_0_ring_emit_tmz(ring, true);
4750 
4751 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
4752 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
4753 		/* set load_global_config & load_global_uconfig */
4754 		dw2 |= 0x8001;
4755 		/* set load_cs_sh_regs */
4756 		dw2 |= 0x01000000;
4757 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
4758 		dw2 |= 0x10002;
4759 
4760 		/* set load_ce_ram if preamble presented */
4761 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
4762 			dw2 |= 0x10000000;
4763 	} else {
4764 		/* still load_ce_ram if this is the first time preamble presented
4765 		 * although there is no context switch happens.
4766 		 */
4767 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
4768 			dw2 |= 0x10000000;
4769 	}
4770 
4771 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4772 	amdgpu_ring_write(ring, dw2);
4773 	amdgpu_ring_write(ring, 0);
4774 }
4775 
4776 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
4777 {
4778 	unsigned ret;
4779 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
4780 	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
4781 	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
4782 	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
4783 	ret = ring->wptr & ring->buf_mask;
4784 	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
4785 	return ret;
4786 }
4787 
4788 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
4789 {
4790 	unsigned cur;
4791 	BUG_ON(offset > ring->buf_mask);
4792 	BUG_ON(ring->ring[offset] != 0x55aa55aa);
4793 
4794 	cur = (ring->wptr & ring->buf_mask) - 1;
4795 	if (likely(cur > offset))
4796 		ring->ring[offset] = cur - offset;
4797 	else
4798 		ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
4799 }
4800 
4801 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
4802 {
4803 	struct amdgpu_device *adev = ring->adev;
4804 
4805 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4806 	amdgpu_ring_write(ring, 0 |	/* src: register*/
4807 				(5 << 8) |	/* dst: memory */
4808 				(1 << 20));	/* write confirm */
4809 	amdgpu_ring_write(ring, reg);
4810 	amdgpu_ring_write(ring, 0);
4811 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4812 				adev->virt.reg_val_offs * 4));
4813 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4814 				adev->virt.reg_val_offs * 4));
4815 }
4816 
4817 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
4818 				    uint32_t val)
4819 {
4820 	uint32_t cmd = 0;
4821 
4822 	switch (ring->funcs->type) {
4823 	case AMDGPU_RING_TYPE_GFX:
4824 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
4825 		break;
4826 	case AMDGPU_RING_TYPE_KIQ:
4827 		cmd = (1 << 16); /* no inc addr */
4828 		break;
4829 	default:
4830 		cmd = WR_CONFIRM;
4831 		break;
4832 	}
4833 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4834 	amdgpu_ring_write(ring, cmd);
4835 	amdgpu_ring_write(ring, reg);
4836 	amdgpu_ring_write(ring, 0);
4837 	amdgpu_ring_write(ring, val);
4838 }
4839 
4840 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
4841 					uint32_t val, uint32_t mask)
4842 {
4843 	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
4844 }
4845 
4846 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
4847 						  uint32_t reg0, uint32_t reg1,
4848 						  uint32_t ref, uint32_t mask)
4849 {
4850 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4851 	struct amdgpu_device *adev = ring->adev;
4852 	bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
4853 		adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
4854 
4855 	if (fw_version_ok)
4856 		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
4857 				      ref, mask, 0x20);
4858 	else
4859 		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
4860 							   ref, mask);
4861 }
4862 
4863 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
4864 {
4865 	struct amdgpu_device *adev = ring->adev;
4866 	uint32_t value = 0;
4867 
4868 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
4869 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
4870 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
4871 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
4872 	WREG32(mmSQ_CMD, value);
4873 }
4874 
4875 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4876 						 enum amdgpu_interrupt_state state)
4877 {
4878 	switch (state) {
4879 	case AMDGPU_IRQ_STATE_DISABLE:
4880 	case AMDGPU_IRQ_STATE_ENABLE:
4881 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4882 			       TIME_STAMP_INT_ENABLE,
4883 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4884 		break;
4885 	default:
4886 		break;
4887 	}
4888 }
4889 
4890 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4891 						     int me, int pipe,
4892 						     enum amdgpu_interrupt_state state)
4893 {
4894 	u32 mec_int_cntl, mec_int_cntl_reg;
4895 
4896 	/*
4897 	 * amdgpu controls only the first MEC. That's why this function only
4898 	 * handles the setting of interrupts for this specific MEC. All other
4899 	 * pipes' interrupts are set by amdkfd.
4900 	 */
4901 
4902 	if (me == 1) {
4903 		switch (pipe) {
4904 		case 0:
4905 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
4906 			break;
4907 		case 1:
4908 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
4909 			break;
4910 		case 2:
4911 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
4912 			break;
4913 		case 3:
4914 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
4915 			break;
4916 		default:
4917 			DRM_DEBUG("invalid pipe %d\n", pipe);
4918 			return;
4919 		}
4920 	} else {
4921 		DRM_DEBUG("invalid me %d\n", me);
4922 		return;
4923 	}
4924 
4925 	switch (state) {
4926 	case AMDGPU_IRQ_STATE_DISABLE:
4927 		mec_int_cntl = RREG32(mec_int_cntl_reg);
4928 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4929 					     TIME_STAMP_INT_ENABLE, 0);
4930 		WREG32(mec_int_cntl_reg, mec_int_cntl);
4931 		break;
4932 	case AMDGPU_IRQ_STATE_ENABLE:
4933 		mec_int_cntl = RREG32(mec_int_cntl_reg);
4934 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4935 					     TIME_STAMP_INT_ENABLE, 1);
4936 		WREG32(mec_int_cntl_reg, mec_int_cntl);
4937 		break;
4938 	default:
4939 		break;
4940 	}
4941 }
4942 
4943 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4944 					     struct amdgpu_irq_src *source,
4945 					     unsigned type,
4946 					     enum amdgpu_interrupt_state state)
4947 {
4948 	switch (state) {
4949 	case AMDGPU_IRQ_STATE_DISABLE:
4950 	case AMDGPU_IRQ_STATE_ENABLE:
4951 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4952 			       PRIV_REG_INT_ENABLE,
4953 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4954 		break;
4955 	default:
4956 		break;
4957 	}
4958 
4959 	return 0;
4960 }
4961 
4962 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4963 					      struct amdgpu_irq_src *source,
4964 					      unsigned type,
4965 					      enum amdgpu_interrupt_state state)
4966 {
4967 	switch (state) {
4968 	case AMDGPU_IRQ_STATE_DISABLE:
4969 	case AMDGPU_IRQ_STATE_ENABLE:
4970 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4971 			       PRIV_INSTR_INT_ENABLE,
4972 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4973 	default:
4974 		break;
4975 	}
4976 
4977 	return 0;
4978 }
4979 
4980 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)				\
4981 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
4982 			CP_ECC_ERROR_INT_ENABLE, 1)
4983 
4984 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)			\
4985 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
4986 			CP_ECC_ERROR_INT_ENABLE, 0)
4987 
4988 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
4989 					      struct amdgpu_irq_src *source,
4990 					      unsigned type,
4991 					      enum amdgpu_interrupt_state state)
4992 {
4993 	switch (state) {
4994 	case AMDGPU_IRQ_STATE_DISABLE:
4995 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4996 				CP_ECC_ERROR_INT_ENABLE, 0);
4997 		DISABLE_ECC_ON_ME_PIPE(1, 0);
4998 		DISABLE_ECC_ON_ME_PIPE(1, 1);
4999 		DISABLE_ECC_ON_ME_PIPE(1, 2);
5000 		DISABLE_ECC_ON_ME_PIPE(1, 3);
5001 		break;
5002 
5003 	case AMDGPU_IRQ_STATE_ENABLE:
5004 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5005 				CP_ECC_ERROR_INT_ENABLE, 1);
5006 		ENABLE_ECC_ON_ME_PIPE(1, 0);
5007 		ENABLE_ECC_ON_ME_PIPE(1, 1);
5008 		ENABLE_ECC_ON_ME_PIPE(1, 2);
5009 		ENABLE_ECC_ON_ME_PIPE(1, 3);
5010 		break;
5011 	default:
5012 		break;
5013 	}
5014 
5015 	return 0;
5016 }
5017 
5018 
5019 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5020 					    struct amdgpu_irq_src *src,
5021 					    unsigned type,
5022 					    enum amdgpu_interrupt_state state)
5023 {
5024 	switch (type) {
5025 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5026 		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5027 		break;
5028 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5029 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5030 		break;
5031 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5032 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5033 		break;
5034 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5035 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5036 		break;
5037 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5038 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5039 		break;
5040 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5041 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5042 		break;
5043 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5044 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5045 		break;
5046 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5047 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5048 		break;
5049 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5050 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5051 		break;
5052 	default:
5053 		break;
5054 	}
5055 	return 0;
5056 }
5057 
5058 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5059 			    struct amdgpu_irq_src *source,
5060 			    struct amdgpu_iv_entry *entry)
5061 {
5062 	int i;
5063 	u8 me_id, pipe_id, queue_id;
5064 	struct amdgpu_ring *ring;
5065 
5066 	DRM_DEBUG("IH: CP EOP\n");
5067 	me_id = (entry->ring_id & 0x0c) >> 2;
5068 	pipe_id = (entry->ring_id & 0x03) >> 0;
5069 	queue_id = (entry->ring_id & 0x70) >> 4;
5070 
5071 	switch (me_id) {
5072 	case 0:
5073 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5074 		break;
5075 	case 1:
5076 	case 2:
5077 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5078 			ring = &adev->gfx.compute_ring[i];
5079 			/* Per-queue interrupt is supported for MEC starting from VI.
5080 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
5081 			  */
5082 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5083 				amdgpu_fence_process(ring);
5084 		}
5085 		break;
5086 	}
5087 	return 0;
5088 }
5089 
5090 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5091 			   struct amdgpu_iv_entry *entry)
5092 {
5093 	u8 me_id, pipe_id, queue_id;
5094 	struct amdgpu_ring *ring;
5095 	int i;
5096 
5097 	me_id = (entry->ring_id & 0x0c) >> 2;
5098 	pipe_id = (entry->ring_id & 0x03) >> 0;
5099 	queue_id = (entry->ring_id & 0x70) >> 4;
5100 
5101 	switch (me_id) {
5102 	case 0:
5103 		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5104 		break;
5105 	case 1:
5106 	case 2:
5107 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5108 			ring = &adev->gfx.compute_ring[i];
5109 			if (ring->me == me_id && ring->pipe == pipe_id &&
5110 			    ring->queue == queue_id)
5111 				drm_sched_fault(&ring->sched);
5112 		}
5113 		break;
5114 	}
5115 }
5116 
5117 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5118 				 struct amdgpu_irq_src *source,
5119 				 struct amdgpu_iv_entry *entry)
5120 {
5121 	DRM_ERROR("Illegal register access in command stream\n");
5122 	gfx_v9_0_fault(adev, entry);
5123 	return 0;
5124 }
5125 
5126 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5127 				  struct amdgpu_irq_src *source,
5128 				  struct amdgpu_iv_entry *entry)
5129 {
5130 	DRM_ERROR("Illegal instruction in command stream\n");
5131 	gfx_v9_0_fault(adev, entry);
5132 	return 0;
5133 }
5134 
5135 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
5136 		struct amdgpu_iv_entry *entry)
5137 {
5138 	/* TODO ue will trigger an interrupt. */
5139 	kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
5140 	amdgpu_ras_reset_gpu(adev, 0);
5141 	return AMDGPU_RAS_UE;
5142 }
5143 
5144 static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev,
5145 				  struct amdgpu_irq_src *source,
5146 				  struct amdgpu_iv_entry *entry)
5147 {
5148 	struct ras_common_if *ras_if = adev->gfx.ras_if;
5149 	struct ras_dispatch_if ih_data = {
5150 		.entry = entry,
5151 	};
5152 
5153 	if (!ras_if)
5154 		return 0;
5155 
5156 	ih_data.head = *ras_if;
5157 
5158 	DRM_ERROR("CP ECC ERROR IRQ\n");
5159 	amdgpu_ras_interrupt_dispatch(adev, &ih_data);
5160 	return 0;
5161 }
5162 
5163 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
5164 	.name = "gfx_v9_0",
5165 	.early_init = gfx_v9_0_early_init,
5166 	.late_init = gfx_v9_0_late_init,
5167 	.sw_init = gfx_v9_0_sw_init,
5168 	.sw_fini = gfx_v9_0_sw_fini,
5169 	.hw_init = gfx_v9_0_hw_init,
5170 	.hw_fini = gfx_v9_0_hw_fini,
5171 	.suspend = gfx_v9_0_suspend,
5172 	.resume = gfx_v9_0_resume,
5173 	.is_idle = gfx_v9_0_is_idle,
5174 	.wait_for_idle = gfx_v9_0_wait_for_idle,
5175 	.soft_reset = gfx_v9_0_soft_reset,
5176 	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
5177 	.set_powergating_state = gfx_v9_0_set_powergating_state,
5178 	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
5179 };
5180 
5181 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
5182 	.type = AMDGPU_RING_TYPE_GFX,
5183 	.align_mask = 0xff,
5184 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
5185 	.support_64bit_ptrs = true,
5186 	.vmhub = AMDGPU_GFXHUB,
5187 	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
5188 	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
5189 	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
5190 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
5191 		5 +  /* COND_EXEC */
5192 		7 +  /* PIPELINE_SYNC */
5193 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5194 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5195 		2 + /* VM_FLUSH */
5196 		8 +  /* FENCE for VM_FLUSH */
5197 		20 + /* GDS switch */
5198 		4 + /* double SWITCH_BUFFER,
5199 		       the first COND_EXEC jump to the place just
5200 			   prior to this double SWITCH_BUFFER  */
5201 		5 + /* COND_EXEC */
5202 		7 +	 /*	HDP_flush */
5203 		4 +	 /*	VGT_flush */
5204 		14 + /*	CE_META */
5205 		31 + /*	DE_META */
5206 		3 + /* CNTX_CTRL */
5207 		5 + /* HDP_INVL */
5208 		8 + 8 + /* FENCE x2 */
5209 		2, /* SWITCH_BUFFER */
5210 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
5211 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
5212 	.emit_fence = gfx_v9_0_ring_emit_fence,
5213 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
5214 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
5215 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
5216 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
5217 	.test_ring = gfx_v9_0_ring_test_ring,
5218 	.test_ib = gfx_v9_0_ring_test_ib,
5219 	.insert_nop = amdgpu_ring_insert_nop,
5220 	.pad_ib = amdgpu_ring_generic_pad_ib,
5221 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
5222 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
5223 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
5224 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
5225 	.emit_tmz = gfx_v9_0_ring_emit_tmz,
5226 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
5227 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5228 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5229 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
5230 };
5231 
5232 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
5233 	.type = AMDGPU_RING_TYPE_COMPUTE,
5234 	.align_mask = 0xff,
5235 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
5236 	.support_64bit_ptrs = true,
5237 	.vmhub = AMDGPU_GFXHUB,
5238 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
5239 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
5240 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
5241 	.emit_frame_size =
5242 		20 + /* gfx_v9_0_ring_emit_gds_switch */
5243 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
5244 		5 + /* hdp invalidate */
5245 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
5246 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5247 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5248 		2 + /* gfx_v9_0_ring_emit_vm_flush */
5249 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
5250 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
5251 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
5252 	.emit_fence = gfx_v9_0_ring_emit_fence,
5253 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
5254 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
5255 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
5256 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
5257 	.test_ring = gfx_v9_0_ring_test_ring,
5258 	.test_ib = gfx_v9_0_ring_test_ib,
5259 	.insert_nop = amdgpu_ring_insert_nop,
5260 	.pad_ib = amdgpu_ring_generic_pad_ib,
5261 	.set_priority = gfx_v9_0_ring_set_priority_compute,
5262 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
5263 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5264 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5265 };
5266 
5267 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
5268 	.type = AMDGPU_RING_TYPE_KIQ,
5269 	.align_mask = 0xff,
5270 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
5271 	.support_64bit_ptrs = true,
5272 	.vmhub = AMDGPU_GFXHUB,
5273 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
5274 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
5275 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
5276 	.emit_frame_size =
5277 		20 + /* gfx_v9_0_ring_emit_gds_switch */
5278 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
5279 		5 + /* hdp invalidate */
5280 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
5281 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5282 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5283 		2 + /* gfx_v9_0_ring_emit_vm_flush */
5284 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
5285 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
5286 	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
5287 	.test_ring = gfx_v9_0_ring_test_ring,
5288 	.insert_nop = amdgpu_ring_insert_nop,
5289 	.pad_ib = amdgpu_ring_generic_pad_ib,
5290 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
5291 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
5292 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5293 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5294 };
5295 
5296 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
5297 {
5298 	int i;
5299 
5300 	adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
5301 
5302 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5303 		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
5304 
5305 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
5306 		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
5307 }
5308 
5309 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
5310 	.set = gfx_v9_0_set_eop_interrupt_state,
5311 	.process = gfx_v9_0_eop_irq,
5312 };
5313 
5314 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
5315 	.set = gfx_v9_0_set_priv_reg_fault_state,
5316 	.process = gfx_v9_0_priv_reg_irq,
5317 };
5318 
5319 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
5320 	.set = gfx_v9_0_set_priv_inst_fault_state,
5321 	.process = gfx_v9_0_priv_inst_irq,
5322 };
5323 
5324 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
5325 	.set = gfx_v9_0_set_cp_ecc_error_state,
5326 	.process = gfx_v9_0_cp_ecc_error_irq,
5327 };
5328 
5329 
5330 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
5331 {
5332 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5333 	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
5334 
5335 	adev->gfx.priv_reg_irq.num_types = 1;
5336 	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
5337 
5338 	adev->gfx.priv_inst_irq.num_types = 1;
5339 	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
5340 
5341 	adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
5342 	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
5343 }
5344 
5345 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
5346 {
5347 	switch (adev->asic_type) {
5348 	case CHIP_VEGA10:
5349 	case CHIP_VEGA12:
5350 	case CHIP_VEGA20:
5351 	case CHIP_RAVEN:
5352 		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
5353 		break;
5354 	default:
5355 		break;
5356 	}
5357 }
5358 
5359 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
5360 {
5361 	/* init asci gds info */
5362 	switch (adev->asic_type) {
5363 	case CHIP_VEGA10:
5364 	case CHIP_VEGA12:
5365 	case CHIP_VEGA20:
5366 		adev->gds.gds_size = 0x10000;
5367 		break;
5368 	case CHIP_RAVEN:
5369 		adev->gds.gds_size = 0x1000;
5370 		break;
5371 	default:
5372 		adev->gds.gds_size = 0x10000;
5373 		break;
5374 	}
5375 
5376 	switch (adev->asic_type) {
5377 	case CHIP_VEGA10:
5378 	case CHIP_VEGA20:
5379 		adev->gds.gds_compute_max_wave_id = 0x7ff;
5380 		break;
5381 	case CHIP_VEGA12:
5382 		adev->gds.gds_compute_max_wave_id = 0x27f;
5383 		break;
5384 	case CHIP_RAVEN:
5385 		if (adev->rev_id >= 0x8)
5386 			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
5387 		else
5388 			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
5389 		break;
5390 	default:
5391 		/* this really depends on the chip */
5392 		adev->gds.gds_compute_max_wave_id = 0x7ff;
5393 		break;
5394 	}
5395 
5396 	adev->gds.gws_size = 64;
5397 	adev->gds.oa_size = 16;
5398 }
5399 
5400 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
5401 						 u32 bitmap)
5402 {
5403 	u32 data;
5404 
5405 	if (!bitmap)
5406 		return;
5407 
5408 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5409 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5410 
5411 	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
5412 }
5413 
5414 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
5415 {
5416 	u32 data, mask;
5417 
5418 	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
5419 	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
5420 
5421 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5422 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5423 
5424 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
5425 
5426 	return (~data) & mask;
5427 }
5428 
5429 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
5430 				 struct amdgpu_cu_info *cu_info)
5431 {
5432 	int i, j, k, counter, active_cu_number = 0;
5433 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5434 	unsigned disable_masks[4 * 2];
5435 
5436 	if (!adev || !cu_info)
5437 		return -EINVAL;
5438 
5439 	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
5440 
5441 	mutex_lock(&adev->grbm_idx_mutex);
5442 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5443 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5444 			mask = 1;
5445 			ao_bitmap = 0;
5446 			counter = 0;
5447 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
5448 			if (i < 4 && j < 2)
5449 				gfx_v9_0_set_user_cu_inactive_bitmap(
5450 					adev, disable_masks[i * 2 + j]);
5451 			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
5452 			cu_info->bitmap[i][j] = bitmap;
5453 
5454 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
5455 				if (bitmap & mask) {
5456 					if (counter < adev->gfx.config.max_cu_per_sh)
5457 						ao_bitmap |= mask;
5458 					counter ++;
5459 				}
5460 				mask <<= 1;
5461 			}
5462 			active_cu_number += counter;
5463 			if (i < 2 && j < 2)
5464 				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5465 			cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
5466 		}
5467 	}
5468 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5469 	mutex_unlock(&adev->grbm_idx_mutex);
5470 
5471 	cu_info->number = active_cu_number;
5472 	cu_info->ao_cu_mask = ao_cu_mask;
5473 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
5474 
5475 	return 0;
5476 }
5477 
5478 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
5479 {
5480 	.type = AMD_IP_BLOCK_TYPE_GFX,
5481 	.major = 9,
5482 	.minor = 0,
5483 	.rev = 0,
5484 	.funcs = &gfx_v9_0_ip_funcs,
5485 };
5486