xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c (revision 7a2eb736)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36 
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 #include "vega10_enum.h"
40 #include "hdp/hdp_4_0_offset.h"
41 
42 #include "soc15.h"
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
46 
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48 
49 #include "amdgpu_ras.h"
50 
51 #define GFX9_NUM_GFX_RINGS     1
52 #define GFX9_MEC_HPD_SIZE 4096
53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
55 
56 #define mmPWR_MISC_CNTL_STATUS					0x0183
57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX				0
58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT	0x0
59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT		0x1
60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK		0x00000001L
61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK		0x00000006L
62 
63 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
64 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
65 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
66 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
69 
70 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
71 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
72 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
73 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
76 
77 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
78 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
79 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
80 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
83 
84 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
85 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
86 MODULE_FIRMWARE("amdgpu/raven_me.bin");
87 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
88 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
89 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
90 
91 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
92 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
93 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
94 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
98 
99 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
100 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
101 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
102 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
105 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
106 
107 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
108 {
109 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
110 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
111 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
112 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
113 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
114 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
115 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
116 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
117 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
118 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
119 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
120 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
121 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
122 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
123 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
124 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
125 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
126 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
127 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
128 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
129 };
130 
131 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
132 {
133 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
134 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
135 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
136 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
137 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
138 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
139 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
140 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
141 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
142 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
143 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
144 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
145 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
146 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
147 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
148 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
149 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
150 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
151 };
152 
153 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
154 {
155 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
156 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
157 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
158 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
159 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
160 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
161 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
162 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
163 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
164 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
165 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
166 };
167 
168 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
169 {
170 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
171 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
172 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
173 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
174 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
175 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
176 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
177 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
178 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
179 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
180 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
181 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
182 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
183 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
184 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
185 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
186 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
187 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
188 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
189 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
190 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
191 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
192 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
193 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
194 };
195 
196 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
197 {
198 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
199 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
200 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
201 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
202 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
203 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
204 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
205 };
206 
207 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
208 {
209 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
210 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
211 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
212 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
213 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
214 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
215 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
216 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
217 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
218 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
219 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
220 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
221 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
222 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
223 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
224 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
225 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
226 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
227 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
228 };
229 
230 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
231 {
232 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
233 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
234 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
235 };
236 
237 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
238 {
239 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
240 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
241 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
242 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
243 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
244 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
245 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
246 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
247 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
248 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
249 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
250 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
251 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
252 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
253 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
254 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
255 };
256 
257 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
258 {
259 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
260 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
261 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
262 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
263 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
264 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
265 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
266 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
267 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
268 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
269 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
270 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
271 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
272 };
273 
274 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
275 {
276 	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
277 	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
278 	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
279 	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
280 	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
281 	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
282 	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
283 	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
284 };
285 
286 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
287 {
288 	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
289 	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
290 	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
291 	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
292 	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
293 	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
294 	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
295 	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
296 };
297 
298 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
299 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
300 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
301 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
302 
303 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
304 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
305 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
306 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
307 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
308                                  struct amdgpu_cu_info *cu_info);
309 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
310 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
311 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
312 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
313 
314 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
315 {
316 	switch (adev->asic_type) {
317 	case CHIP_VEGA10:
318 		if (!amdgpu_virt_support_skip_setting(adev)) {
319 			soc15_program_register_sequence(adev,
320 							 golden_settings_gc_9_0,
321 							 ARRAY_SIZE(golden_settings_gc_9_0));
322 			soc15_program_register_sequence(adev,
323 							 golden_settings_gc_9_0_vg10,
324 							 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
325 		}
326 		break;
327 	case CHIP_VEGA12:
328 		soc15_program_register_sequence(adev,
329 						golden_settings_gc_9_2_1,
330 						ARRAY_SIZE(golden_settings_gc_9_2_1));
331 		soc15_program_register_sequence(adev,
332 						golden_settings_gc_9_2_1_vg12,
333 						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
334 		break;
335 	case CHIP_VEGA20:
336 		soc15_program_register_sequence(adev,
337 						golden_settings_gc_9_0,
338 						ARRAY_SIZE(golden_settings_gc_9_0));
339 		soc15_program_register_sequence(adev,
340 						golden_settings_gc_9_0_vg20,
341 						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
342 		break;
343 	case CHIP_RAVEN:
344 		soc15_program_register_sequence(adev, golden_settings_gc_9_1,
345 						ARRAY_SIZE(golden_settings_gc_9_1));
346 		if (adev->rev_id >= 8)
347 			soc15_program_register_sequence(adev,
348 							golden_settings_gc_9_1_rv2,
349 							ARRAY_SIZE(golden_settings_gc_9_1_rv2));
350 		else
351 			soc15_program_register_sequence(adev,
352 							golden_settings_gc_9_1_rv1,
353 							ARRAY_SIZE(golden_settings_gc_9_1_rv1));
354 		break;
355 	default:
356 		break;
357 	}
358 
359 	soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
360 					(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
361 }
362 
363 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
364 {
365 	adev->gfx.scratch.num_reg = 8;
366 	adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
367 	adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
368 }
369 
370 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
371 				       bool wc, uint32_t reg, uint32_t val)
372 {
373 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
374 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
375 				WRITE_DATA_DST_SEL(0) |
376 				(wc ? WR_CONFIRM : 0));
377 	amdgpu_ring_write(ring, reg);
378 	amdgpu_ring_write(ring, 0);
379 	amdgpu_ring_write(ring, val);
380 }
381 
382 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
383 				  int mem_space, int opt, uint32_t addr0,
384 				  uint32_t addr1, uint32_t ref, uint32_t mask,
385 				  uint32_t inv)
386 {
387 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
388 	amdgpu_ring_write(ring,
389 				 /* memory (1) or register (0) */
390 				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
391 				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
392 				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
393 				 WAIT_REG_MEM_ENGINE(eng_sel)));
394 
395 	if (mem_space)
396 		BUG_ON(addr0 & 0x3); /* Dword align */
397 	amdgpu_ring_write(ring, addr0);
398 	amdgpu_ring_write(ring, addr1);
399 	amdgpu_ring_write(ring, ref);
400 	amdgpu_ring_write(ring, mask);
401 	amdgpu_ring_write(ring, inv); /* poll interval */
402 }
403 
404 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
405 {
406 	struct amdgpu_device *adev = ring->adev;
407 	uint32_t scratch;
408 	uint32_t tmp = 0;
409 	unsigned i;
410 	int r;
411 
412 	r = amdgpu_gfx_scratch_get(adev, &scratch);
413 	if (r)
414 		return r;
415 
416 	WREG32(scratch, 0xCAFEDEAD);
417 	r = amdgpu_ring_alloc(ring, 3);
418 	if (r)
419 		goto error_free_scratch;
420 
421 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
422 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
423 	amdgpu_ring_write(ring, 0xDEADBEEF);
424 	amdgpu_ring_commit(ring);
425 
426 	for (i = 0; i < adev->usec_timeout; i++) {
427 		tmp = RREG32(scratch);
428 		if (tmp == 0xDEADBEEF)
429 			break;
430 		udelay(1);
431 	}
432 
433 	if (i >= adev->usec_timeout)
434 		r = -ETIMEDOUT;
435 
436 error_free_scratch:
437 	amdgpu_gfx_scratch_free(adev, scratch);
438 	return r;
439 }
440 
441 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
442 {
443 	struct amdgpu_device *adev = ring->adev;
444 	struct amdgpu_ib ib;
445 	struct dma_fence *f = NULL;
446 
447 	unsigned index;
448 	uint64_t gpu_addr;
449 	uint32_t tmp;
450 	long r;
451 
452 	r = amdgpu_device_wb_get(adev, &index);
453 	if (r)
454 		return r;
455 
456 	gpu_addr = adev->wb.gpu_addr + (index * 4);
457 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
458 	memset(&ib, 0, sizeof(ib));
459 	r = amdgpu_ib_get(adev, NULL, 16, &ib);
460 	if (r)
461 		goto err1;
462 
463 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
464 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
465 	ib.ptr[2] = lower_32_bits(gpu_addr);
466 	ib.ptr[3] = upper_32_bits(gpu_addr);
467 	ib.ptr[4] = 0xDEADBEEF;
468 	ib.length_dw = 5;
469 
470 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
471 	if (r)
472 		goto err2;
473 
474 	r = dma_fence_wait_timeout(f, false, timeout);
475 	if (r == 0) {
476 		r = -ETIMEDOUT;
477 		goto err2;
478 	} else if (r < 0) {
479 		goto err2;
480 	}
481 
482 	tmp = adev->wb.wb[index];
483 	if (tmp == 0xDEADBEEF)
484 		r = 0;
485 	else
486 		r = -EINVAL;
487 
488 err2:
489 	amdgpu_ib_free(adev, &ib, NULL);
490 	dma_fence_put(f);
491 err1:
492 	amdgpu_device_wb_free(adev, index);
493 	return r;
494 }
495 
496 
497 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
498 {
499 	release_firmware(adev->gfx.pfp_fw);
500 	adev->gfx.pfp_fw = NULL;
501 	release_firmware(adev->gfx.me_fw);
502 	adev->gfx.me_fw = NULL;
503 	release_firmware(adev->gfx.ce_fw);
504 	adev->gfx.ce_fw = NULL;
505 	release_firmware(adev->gfx.rlc_fw);
506 	adev->gfx.rlc_fw = NULL;
507 	release_firmware(adev->gfx.mec_fw);
508 	adev->gfx.mec_fw = NULL;
509 	release_firmware(adev->gfx.mec2_fw);
510 	adev->gfx.mec2_fw = NULL;
511 
512 	kfree(adev->gfx.rlc.register_list_format);
513 }
514 
515 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
516 {
517 	const struct rlc_firmware_header_v2_1 *rlc_hdr;
518 
519 	rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
520 	adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
521 	adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
522 	adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
523 	adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
524 	adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
525 	adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
526 	adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
527 	adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
528 	adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
529 	adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
530 	adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
531 	adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
532 	adev->gfx.rlc.reg_list_format_direct_reg_list_length =
533 			le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
534 }
535 
536 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
537 {
538 	adev->gfx.me_fw_write_wait = false;
539 	adev->gfx.mec_fw_write_wait = false;
540 
541 	switch (adev->asic_type) {
542 	case CHIP_VEGA10:
543 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
544 		    (adev->gfx.me_feature_version >= 42) &&
545 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
546 		    (adev->gfx.pfp_feature_version >= 42))
547 			adev->gfx.me_fw_write_wait = true;
548 
549 		if ((adev->gfx.mec_fw_version >=  0x00000193) &&
550 		    (adev->gfx.mec_feature_version >= 42))
551 			adev->gfx.mec_fw_write_wait = true;
552 		break;
553 	case CHIP_VEGA12:
554 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
555 		    (adev->gfx.me_feature_version >= 44) &&
556 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
557 		    (adev->gfx.pfp_feature_version >= 44))
558 			adev->gfx.me_fw_write_wait = true;
559 
560 		if ((adev->gfx.mec_fw_version >=  0x00000196) &&
561 		    (adev->gfx.mec_feature_version >= 44))
562 			adev->gfx.mec_fw_write_wait = true;
563 		break;
564 	case CHIP_VEGA20:
565 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
566 		    (adev->gfx.me_feature_version >= 44) &&
567 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
568 		    (adev->gfx.pfp_feature_version >= 44))
569 			adev->gfx.me_fw_write_wait = true;
570 
571 		if ((adev->gfx.mec_fw_version >=  0x00000197) &&
572 		    (adev->gfx.mec_feature_version >= 44))
573 			adev->gfx.mec_fw_write_wait = true;
574 		break;
575 	case CHIP_RAVEN:
576 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
577 		    (adev->gfx.me_feature_version >= 42) &&
578 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
579 		    (adev->gfx.pfp_feature_version >= 42))
580 			adev->gfx.me_fw_write_wait = true;
581 
582 		if ((adev->gfx.mec_fw_version >=  0x00000192) &&
583 		    (adev->gfx.mec_feature_version >= 42))
584 			adev->gfx.mec_fw_write_wait = true;
585 		break;
586 	default:
587 		break;
588 	}
589 }
590 
591 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
592 {
593 	switch (adev->asic_type) {
594 	case CHIP_VEGA10:
595 	case CHIP_VEGA12:
596 	case CHIP_VEGA20:
597 		break;
598 	case CHIP_RAVEN:
599 		if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
600 			&&((adev->gfx.rlc_fw_version != 106 &&
601 			     adev->gfx.rlc_fw_version < 531) ||
602 			    (adev->gfx.rlc_fw_version == 53815) ||
603 			    (adev->gfx.rlc_feature_version < 1) ||
604 			    !adev->gfx.rlc.is_rlc_v2_1))
605 			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
606 
607 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
608 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
609 				AMD_PG_SUPPORT_CP |
610 				AMD_PG_SUPPORT_RLC_SMU_HS;
611 		break;
612 	default:
613 		break;
614 	}
615 }
616 
617 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
618 {
619 	const char *chip_name;
620 	char fw_name[30];
621 	int err;
622 	struct amdgpu_firmware_info *info = NULL;
623 	const struct common_firmware_header *header = NULL;
624 	const struct gfx_firmware_header_v1_0 *cp_hdr;
625 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
626 	unsigned int *tmp = NULL;
627 	unsigned int i = 0;
628 	uint16_t version_major;
629 	uint16_t version_minor;
630 	uint32_t smu_version;
631 
632 	DRM_DEBUG("\n");
633 
634 	switch (adev->asic_type) {
635 	case CHIP_VEGA10:
636 		chip_name = "vega10";
637 		break;
638 	case CHIP_VEGA12:
639 		chip_name = "vega12";
640 		break;
641 	case CHIP_VEGA20:
642 		chip_name = "vega20";
643 		break;
644 	case CHIP_RAVEN:
645 		if (adev->rev_id >= 8)
646 			chip_name = "raven2";
647 		else if (adev->pdev->device == 0x15d8)
648 			chip_name = "picasso";
649 		else
650 			chip_name = "raven";
651 		break;
652 	default:
653 		BUG();
654 	}
655 
656 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
657 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
658 	if (err)
659 		goto out;
660 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
661 	if (err)
662 		goto out;
663 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
664 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
665 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
666 
667 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
668 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
669 	if (err)
670 		goto out;
671 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
672 	if (err)
673 		goto out;
674 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
675 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
676 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
677 
678 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
679 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
680 	if (err)
681 		goto out;
682 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
683 	if (err)
684 		goto out;
685 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
686 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
687 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
688 
689 	/*
690 	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
691 	 * instead of picasso_rlc.bin.
692 	 * Judgment method:
693 	 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
694 	 *          or revision >= 0xD8 && revision <= 0xDF
695 	 * otherwise is PCO FP5
696 	 */
697 	if (!strcmp(chip_name, "picasso") &&
698 		(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
699 		((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
700 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
701 	else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
702 		(smu_version >= 0x41e2b))
703 		/**
704 		*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
705 		*/
706 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
707 	else
708 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
709 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
710 	if (err)
711 		goto out;
712 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
713 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
714 
715 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
716 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
717 	if (version_major == 2 && version_minor == 1)
718 		adev->gfx.rlc.is_rlc_v2_1 = true;
719 
720 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
721 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
722 	adev->gfx.rlc.save_and_restore_offset =
723 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
724 	adev->gfx.rlc.clear_state_descriptor_offset =
725 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
726 	adev->gfx.rlc.avail_scratch_ram_locations =
727 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
728 	adev->gfx.rlc.reg_restore_list_size =
729 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
730 	adev->gfx.rlc.reg_list_format_start =
731 			le32_to_cpu(rlc_hdr->reg_list_format_start);
732 	adev->gfx.rlc.reg_list_format_separate_start =
733 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
734 	adev->gfx.rlc.starting_offsets_start =
735 			le32_to_cpu(rlc_hdr->starting_offsets_start);
736 	adev->gfx.rlc.reg_list_format_size_bytes =
737 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
738 	adev->gfx.rlc.reg_list_size_bytes =
739 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
740 	adev->gfx.rlc.register_list_format =
741 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
742 				adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
743 	if (!adev->gfx.rlc.register_list_format) {
744 		err = -ENOMEM;
745 		goto out;
746 	}
747 
748 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
749 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
750 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
751 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
752 
753 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
754 
755 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
756 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
757 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
758 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
759 
760 	if (adev->gfx.rlc.is_rlc_v2_1)
761 		gfx_v9_0_init_rlc_ext_microcode(adev);
762 
763 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
764 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
765 	if (err)
766 		goto out;
767 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
768 	if (err)
769 		goto out;
770 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
771 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
772 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
773 
774 
775 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
776 	err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
777 	if (!err) {
778 		err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
779 		if (err)
780 			goto out;
781 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
782 		adev->gfx.mec2_fw->data;
783 		adev->gfx.mec2_fw_version =
784 		le32_to_cpu(cp_hdr->header.ucode_version);
785 		adev->gfx.mec2_feature_version =
786 		le32_to_cpu(cp_hdr->ucode_feature_version);
787 	} else {
788 		err = 0;
789 		adev->gfx.mec2_fw = NULL;
790 	}
791 
792 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
793 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
794 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
795 		info->fw = adev->gfx.pfp_fw;
796 		header = (const struct common_firmware_header *)info->fw->data;
797 		adev->firmware.fw_size +=
798 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
799 
800 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
801 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
802 		info->fw = adev->gfx.me_fw;
803 		header = (const struct common_firmware_header *)info->fw->data;
804 		adev->firmware.fw_size +=
805 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
806 
807 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
808 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
809 		info->fw = adev->gfx.ce_fw;
810 		header = (const struct common_firmware_header *)info->fw->data;
811 		adev->firmware.fw_size +=
812 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
813 
814 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
815 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
816 		info->fw = adev->gfx.rlc_fw;
817 		header = (const struct common_firmware_header *)info->fw->data;
818 		adev->firmware.fw_size +=
819 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
820 
821 		if (adev->gfx.rlc.is_rlc_v2_1 &&
822 		    adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
823 		    adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
824 		    adev->gfx.rlc.save_restore_list_srm_size_bytes) {
825 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
826 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
827 			info->fw = adev->gfx.rlc_fw;
828 			adev->firmware.fw_size +=
829 				ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
830 
831 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
832 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
833 			info->fw = adev->gfx.rlc_fw;
834 			adev->firmware.fw_size +=
835 				ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
836 
837 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
838 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
839 			info->fw = adev->gfx.rlc_fw;
840 			adev->firmware.fw_size +=
841 				ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
842 		}
843 
844 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
845 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
846 		info->fw = adev->gfx.mec_fw;
847 		header = (const struct common_firmware_header *)info->fw->data;
848 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
849 		adev->firmware.fw_size +=
850 			ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
851 
852 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
853 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
854 		info->fw = adev->gfx.mec_fw;
855 		adev->firmware.fw_size +=
856 			ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
857 
858 		if (adev->gfx.mec2_fw) {
859 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
860 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
861 			info->fw = adev->gfx.mec2_fw;
862 			header = (const struct common_firmware_header *)info->fw->data;
863 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
864 			adev->firmware.fw_size +=
865 				ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
866 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
867 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
868 			info->fw = adev->gfx.mec2_fw;
869 			adev->firmware.fw_size +=
870 				ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
871 		}
872 
873 	}
874 
875 out:
876 	gfx_v9_0_check_if_need_gfxoff(adev);
877 	gfx_v9_0_check_fw_write_wait(adev);
878 	if (err) {
879 		dev_err(adev->dev,
880 			"gfx9: Failed to load firmware \"%s\"\n",
881 			fw_name);
882 		release_firmware(adev->gfx.pfp_fw);
883 		adev->gfx.pfp_fw = NULL;
884 		release_firmware(adev->gfx.me_fw);
885 		adev->gfx.me_fw = NULL;
886 		release_firmware(adev->gfx.ce_fw);
887 		adev->gfx.ce_fw = NULL;
888 		release_firmware(adev->gfx.rlc_fw);
889 		adev->gfx.rlc_fw = NULL;
890 		release_firmware(adev->gfx.mec_fw);
891 		adev->gfx.mec_fw = NULL;
892 		release_firmware(adev->gfx.mec2_fw);
893 		adev->gfx.mec2_fw = NULL;
894 	}
895 	return err;
896 }
897 
898 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
899 {
900 	u32 count = 0;
901 	const struct cs_section_def *sect = NULL;
902 	const struct cs_extent_def *ext = NULL;
903 
904 	/* begin clear state */
905 	count += 2;
906 	/* context control state */
907 	count += 3;
908 
909 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
910 		for (ext = sect->section; ext->extent != NULL; ++ext) {
911 			if (sect->id == SECT_CONTEXT)
912 				count += 2 + ext->reg_count;
913 			else
914 				return 0;
915 		}
916 	}
917 
918 	/* end clear state */
919 	count += 2;
920 	/* clear state */
921 	count += 2;
922 
923 	return count;
924 }
925 
926 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
927 				    volatile u32 *buffer)
928 {
929 	u32 count = 0, i;
930 	const struct cs_section_def *sect = NULL;
931 	const struct cs_extent_def *ext = NULL;
932 
933 	if (adev->gfx.rlc.cs_data == NULL)
934 		return;
935 	if (buffer == NULL)
936 		return;
937 
938 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
939 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
940 
941 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
942 	buffer[count++] = cpu_to_le32(0x80000000);
943 	buffer[count++] = cpu_to_le32(0x80000000);
944 
945 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
946 		for (ext = sect->section; ext->extent != NULL; ++ext) {
947 			if (sect->id == SECT_CONTEXT) {
948 				buffer[count++] =
949 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
950 				buffer[count++] = cpu_to_le32(ext->reg_index -
951 						PACKET3_SET_CONTEXT_REG_START);
952 				for (i = 0; i < ext->reg_count; i++)
953 					buffer[count++] = cpu_to_le32(ext->extent[i]);
954 			} else {
955 				return;
956 			}
957 		}
958 	}
959 
960 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
961 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
962 
963 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
964 	buffer[count++] = cpu_to_le32(0);
965 }
966 
967 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
968 {
969 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
970 	uint32_t pg_always_on_cu_num = 2;
971 	uint32_t always_on_cu_num;
972 	uint32_t i, j, k;
973 	uint32_t mask, cu_bitmap, counter;
974 
975 	if (adev->flags & AMD_IS_APU)
976 		always_on_cu_num = 4;
977 	else if (adev->asic_type == CHIP_VEGA12)
978 		always_on_cu_num = 8;
979 	else
980 		always_on_cu_num = 12;
981 
982 	mutex_lock(&adev->grbm_idx_mutex);
983 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
984 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
985 			mask = 1;
986 			cu_bitmap = 0;
987 			counter = 0;
988 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
989 
990 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
991 				if (cu_info->bitmap[i][j] & mask) {
992 					if (counter == pg_always_on_cu_num)
993 						WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
994 					if (counter < always_on_cu_num)
995 						cu_bitmap |= mask;
996 					else
997 						break;
998 					counter++;
999 				}
1000 				mask <<= 1;
1001 			}
1002 
1003 			WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1004 			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1005 		}
1006 	}
1007 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1008 	mutex_unlock(&adev->grbm_idx_mutex);
1009 }
1010 
1011 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1012 {
1013 	uint32_t data;
1014 
1015 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1016 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1017 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1018 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1019 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1020 
1021 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1022 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1023 
1024 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1025 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1026 
1027 	mutex_lock(&adev->grbm_idx_mutex);
1028 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1029 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1030 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1031 
1032 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1033 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1034 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1035 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1036 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1037 
1038 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1039 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1040 	data &= 0x0000FFFF;
1041 	data |= 0x00C00000;
1042 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1043 
1044 	/*
1045 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1046 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1047 	 */
1048 
1049 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1050 	 * but used for RLC_LB_CNTL configuration */
1051 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1052 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1053 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1054 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1055 	mutex_unlock(&adev->grbm_idx_mutex);
1056 
1057 	gfx_v9_0_init_always_on_cu_mask(adev);
1058 }
1059 
1060 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1061 {
1062 	uint32_t data;
1063 
1064 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1065 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1066 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1067 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1068 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1069 
1070 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1071 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1072 
1073 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1074 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1075 
1076 	mutex_lock(&adev->grbm_idx_mutex);
1077 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1078 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1079 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1080 
1081 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1082 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1083 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1084 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1085 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1086 
1087 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1088 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1089 	data &= 0x0000FFFF;
1090 	data |= 0x00C00000;
1091 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1092 
1093 	/*
1094 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1095 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1096 	 */
1097 
1098 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1099 	 * but used for RLC_LB_CNTL configuration */
1100 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1101 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1102 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1103 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1104 	mutex_unlock(&adev->grbm_idx_mutex);
1105 
1106 	gfx_v9_0_init_always_on_cu_mask(adev);
1107 }
1108 
1109 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1110 {
1111 	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1112 }
1113 
1114 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1115 {
1116 	return 5;
1117 }
1118 
1119 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1120 {
1121 	const struct cs_section_def *cs_data;
1122 	int r;
1123 
1124 	adev->gfx.rlc.cs_data = gfx9_cs_data;
1125 
1126 	cs_data = adev->gfx.rlc.cs_data;
1127 
1128 	if (cs_data) {
1129 		/* init clear state block */
1130 		r = amdgpu_gfx_rlc_init_csb(adev);
1131 		if (r)
1132 			return r;
1133 	}
1134 
1135 	if (adev->asic_type == CHIP_RAVEN) {
1136 		/* TODO: double check the cp_table_size for RV */
1137 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1138 		r = amdgpu_gfx_rlc_init_cpt(adev);
1139 		if (r)
1140 			return r;
1141 	}
1142 
1143 	switch (adev->asic_type) {
1144 	case CHIP_RAVEN:
1145 		gfx_v9_0_init_lbpw(adev);
1146 		break;
1147 	case CHIP_VEGA20:
1148 		gfx_v9_4_init_lbpw(adev);
1149 		break;
1150 	default:
1151 		break;
1152 	}
1153 
1154 	return 0;
1155 }
1156 
1157 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
1158 {
1159 	int r;
1160 
1161 	r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1162 	if (unlikely(r != 0))
1163 		return r;
1164 
1165 	r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1166 			AMDGPU_GEM_DOMAIN_VRAM);
1167 	if (!r)
1168 		adev->gfx.rlc.clear_state_gpu_addr =
1169 			amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1170 
1171 	amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1172 
1173 	return r;
1174 }
1175 
1176 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
1177 {
1178 	int r;
1179 
1180 	if (!adev->gfx.rlc.clear_state_obj)
1181 		return;
1182 
1183 	r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1184 	if (likely(r == 0)) {
1185 		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1186 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1187 	}
1188 }
1189 
1190 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1191 {
1192 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1193 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1194 }
1195 
1196 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1197 {
1198 	int r;
1199 	u32 *hpd;
1200 	const __le32 *fw_data;
1201 	unsigned fw_size;
1202 	u32 *fw;
1203 	size_t mec_hpd_size;
1204 
1205 	const struct gfx_firmware_header_v1_0 *mec_hdr;
1206 
1207 	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1208 
1209 	/* take ownership of the relevant compute queues */
1210 	amdgpu_gfx_compute_queue_acquire(adev);
1211 	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1212 
1213 	r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1214 				      AMDGPU_GEM_DOMAIN_VRAM,
1215 				      &adev->gfx.mec.hpd_eop_obj,
1216 				      &adev->gfx.mec.hpd_eop_gpu_addr,
1217 				      (void **)&hpd);
1218 	if (r) {
1219 		dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1220 		gfx_v9_0_mec_fini(adev);
1221 		return r;
1222 	}
1223 
1224 	memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1225 
1226 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1227 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1228 
1229 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1230 
1231 	fw_data = (const __le32 *)
1232 		(adev->gfx.mec_fw->data +
1233 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1234 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1235 
1236 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1237 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1238 				      &adev->gfx.mec.mec_fw_obj,
1239 				      &adev->gfx.mec.mec_fw_gpu_addr,
1240 				      (void **)&fw);
1241 	if (r) {
1242 		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1243 		gfx_v9_0_mec_fini(adev);
1244 		return r;
1245 	}
1246 
1247 	memcpy(fw, fw_data, fw_size);
1248 
1249 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1250 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1251 
1252 	return 0;
1253 }
1254 
1255 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1256 {
1257 	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1258 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1259 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1260 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
1261 		(SQ_IND_INDEX__FORCE_READ_MASK));
1262 	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1263 }
1264 
1265 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1266 			   uint32_t wave, uint32_t thread,
1267 			   uint32_t regno, uint32_t num, uint32_t *out)
1268 {
1269 	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1270 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1271 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1272 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
1273 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1274 		(SQ_IND_INDEX__FORCE_READ_MASK) |
1275 		(SQ_IND_INDEX__AUTO_INCR_MASK));
1276 	while (num--)
1277 		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1278 }
1279 
1280 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1281 {
1282 	/* type 1 wave data */
1283 	dst[(*no_fields)++] = 1;
1284 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1285 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1286 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1287 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1288 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1289 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1290 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1291 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1292 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1293 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1294 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1295 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1296 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1297 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1298 }
1299 
1300 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1301 				     uint32_t wave, uint32_t start,
1302 				     uint32_t size, uint32_t *dst)
1303 {
1304 	wave_read_regs(
1305 		adev, simd, wave, 0,
1306 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1307 }
1308 
1309 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1310 				     uint32_t wave, uint32_t thread,
1311 				     uint32_t start, uint32_t size,
1312 				     uint32_t *dst)
1313 {
1314 	wave_read_regs(
1315 		adev, simd, wave, thread,
1316 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1317 }
1318 
1319 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1320 				  u32 me, u32 pipe, u32 q, u32 vm)
1321 {
1322 	soc15_grbm_select(adev, me, pipe, q, vm);
1323 }
1324 
1325 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1326 	.get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1327 	.select_se_sh = &gfx_v9_0_select_se_sh,
1328 	.read_wave_data = &gfx_v9_0_read_wave_data,
1329 	.read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1330 	.read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1331 	.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q
1332 };
1333 
1334 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1335 {
1336 	u32 gb_addr_config;
1337 	int err;
1338 
1339 	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1340 
1341 	switch (adev->asic_type) {
1342 	case CHIP_VEGA10:
1343 		adev->gfx.config.max_hw_contexts = 8;
1344 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1345 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1346 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1347 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1348 		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1349 		break;
1350 	case CHIP_VEGA12:
1351 		adev->gfx.config.max_hw_contexts = 8;
1352 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1353 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1354 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1355 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1356 		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1357 		DRM_INFO("fix gfx.config for vega12\n");
1358 		break;
1359 	case CHIP_VEGA20:
1360 		adev->gfx.config.max_hw_contexts = 8;
1361 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1362 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1363 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1364 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1365 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1366 		gb_addr_config &= ~0xf3e777ff;
1367 		gb_addr_config |= 0x22014042;
1368 		/* check vbios table if gpu info is not available */
1369 		err = amdgpu_atomfirmware_get_gfx_info(adev);
1370 		if (err)
1371 			return err;
1372 		break;
1373 	case CHIP_RAVEN:
1374 		adev->gfx.config.max_hw_contexts = 8;
1375 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1376 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1377 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1378 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1379 		if (adev->rev_id >= 8)
1380 			gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1381 		else
1382 			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1383 		break;
1384 	default:
1385 		BUG();
1386 		break;
1387 	}
1388 
1389 	adev->gfx.config.gb_addr_config = gb_addr_config;
1390 
1391 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1392 			REG_GET_FIELD(
1393 					adev->gfx.config.gb_addr_config,
1394 					GB_ADDR_CONFIG,
1395 					NUM_PIPES);
1396 
1397 	adev->gfx.config.max_tile_pipes =
1398 		adev->gfx.config.gb_addr_config_fields.num_pipes;
1399 
1400 	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1401 			REG_GET_FIELD(
1402 					adev->gfx.config.gb_addr_config,
1403 					GB_ADDR_CONFIG,
1404 					NUM_BANKS);
1405 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1406 			REG_GET_FIELD(
1407 					adev->gfx.config.gb_addr_config,
1408 					GB_ADDR_CONFIG,
1409 					MAX_COMPRESSED_FRAGS);
1410 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1411 			REG_GET_FIELD(
1412 					adev->gfx.config.gb_addr_config,
1413 					GB_ADDR_CONFIG,
1414 					NUM_RB_PER_SE);
1415 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1416 			REG_GET_FIELD(
1417 					adev->gfx.config.gb_addr_config,
1418 					GB_ADDR_CONFIG,
1419 					NUM_SHADER_ENGINES);
1420 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1421 			REG_GET_FIELD(
1422 					adev->gfx.config.gb_addr_config,
1423 					GB_ADDR_CONFIG,
1424 					PIPE_INTERLEAVE_SIZE));
1425 
1426 	return 0;
1427 }
1428 
1429 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
1430 				   struct amdgpu_ngg_buf *ngg_buf,
1431 				   int size_se,
1432 				   int default_size_se)
1433 {
1434 	int r;
1435 
1436 	if (size_se < 0) {
1437 		dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se);
1438 		return -EINVAL;
1439 	}
1440 	size_se = size_se ? size_se : default_size_se;
1441 
1442 	ngg_buf->size = size_se * adev->gfx.config.max_shader_engines;
1443 	r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
1444 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1445 				    &ngg_buf->bo,
1446 				    &ngg_buf->gpu_addr,
1447 				    NULL);
1448 	if (r) {
1449 		dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r);
1450 		return r;
1451 	}
1452 	ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo);
1453 
1454 	return r;
1455 }
1456 
1457 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev)
1458 {
1459 	int i;
1460 
1461 	for (i = 0; i < NGG_BUF_MAX; i++)
1462 		amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo,
1463 				      &adev->gfx.ngg.buf[i].gpu_addr,
1464 				      NULL);
1465 
1466 	memset(&adev->gfx.ngg.buf[0], 0,
1467 			sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX);
1468 
1469 	adev->gfx.ngg.init = false;
1470 
1471 	return 0;
1472 }
1473 
1474 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
1475 {
1476 	int r;
1477 
1478 	if (!amdgpu_ngg || adev->gfx.ngg.init == true)
1479 		return 0;
1480 
1481 	/* GDS reserve memory: 64 bytes alignment */
1482 	adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
1483 	adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size;
1484 	adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
1485 	adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
1486 
1487 	/* Primitive Buffer */
1488 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
1489 				    amdgpu_prim_buf_per_se,
1490 				    64 * 1024);
1491 	if (r) {
1492 		dev_err(adev->dev, "Failed to create Primitive Buffer\n");
1493 		goto err;
1494 	}
1495 
1496 	/* Position Buffer */
1497 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS],
1498 				    amdgpu_pos_buf_per_se,
1499 				    256 * 1024);
1500 	if (r) {
1501 		dev_err(adev->dev, "Failed to create Position Buffer\n");
1502 		goto err;
1503 	}
1504 
1505 	/* Control Sideband */
1506 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL],
1507 				    amdgpu_cntl_sb_buf_per_se,
1508 				    256);
1509 	if (r) {
1510 		dev_err(adev->dev, "Failed to create Control Sideband Buffer\n");
1511 		goto err;
1512 	}
1513 
1514 	/* Parameter Cache, not created by default */
1515 	if (amdgpu_param_buf_per_se <= 0)
1516 		goto out;
1517 
1518 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM],
1519 				    amdgpu_param_buf_per_se,
1520 				    512 * 1024);
1521 	if (r) {
1522 		dev_err(adev->dev, "Failed to create Parameter Cache\n");
1523 		goto err;
1524 	}
1525 
1526 out:
1527 	adev->gfx.ngg.init = true;
1528 	return 0;
1529 err:
1530 	gfx_v9_0_ngg_fini(adev);
1531 	return r;
1532 }
1533 
1534 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
1535 {
1536 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
1537 	int r;
1538 	u32 data, base;
1539 
1540 	if (!amdgpu_ngg)
1541 		return 0;
1542 
1543 	/* Program buffer size */
1544 	data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE,
1545 			     adev->gfx.ngg.buf[NGG_PRIM].size >> 8);
1546 	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE,
1547 			     adev->gfx.ngg.buf[NGG_POS].size >> 8);
1548 	WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
1549 
1550 	data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE,
1551 			     adev->gfx.ngg.buf[NGG_CNTL].size >> 8);
1552 	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE,
1553 			     adev->gfx.ngg.buf[NGG_PARAM].size >> 10);
1554 	WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
1555 
1556 	/* Program buffer base address */
1557 	base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
1558 	data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
1559 	WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data);
1560 
1561 	base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
1562 	data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
1563 	WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data);
1564 
1565 	base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
1566 	data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
1567 	WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data);
1568 
1569 	base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
1570 	data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
1571 	WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data);
1572 
1573 	base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
1574 	data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
1575 	WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data);
1576 
1577 	base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
1578 	data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
1579 	WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data);
1580 
1581 	/* Clear GDS reserved memory */
1582 	r = amdgpu_ring_alloc(ring, 17);
1583 	if (r) {
1584 		DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n",
1585 			  ring->name, r);
1586 		return r;
1587 	}
1588 
1589 	gfx_v9_0_write_data_to_reg(ring, 0, false,
1590 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
1591 			           (adev->gds.gds_size +
1592 				    adev->gfx.ngg.gds_reserve_size));
1593 
1594 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
1595 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
1596 				PACKET3_DMA_DATA_DST_SEL(1) |
1597 				PACKET3_DMA_DATA_SRC_SEL(2)));
1598 	amdgpu_ring_write(ring, 0);
1599 	amdgpu_ring_write(ring, 0);
1600 	amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
1601 	amdgpu_ring_write(ring, 0);
1602 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
1603 				adev->gfx.ngg.gds_reserve_size);
1604 
1605 	gfx_v9_0_write_data_to_reg(ring, 0, false,
1606 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0);
1607 
1608 	amdgpu_ring_commit(ring);
1609 
1610 	return 0;
1611 }
1612 
1613 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1614 				      int mec, int pipe, int queue)
1615 {
1616 	int r;
1617 	unsigned irq_type;
1618 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1619 
1620 	ring = &adev->gfx.compute_ring[ring_id];
1621 
1622 	/* mec0 is me1 */
1623 	ring->me = mec + 1;
1624 	ring->pipe = pipe;
1625 	ring->queue = queue;
1626 
1627 	ring->ring_obj = NULL;
1628 	ring->use_doorbell = true;
1629 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
1630 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1631 				+ (ring_id * GFX9_MEC_HPD_SIZE);
1632 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1633 
1634 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1635 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1636 		+ ring->pipe;
1637 
1638 	/* type-2 packets are deprecated on MEC, use type-3 instead */
1639 	r = amdgpu_ring_init(adev, ring, 1024,
1640 			     &adev->gfx.eop_irq, irq_type);
1641 	if (r)
1642 		return r;
1643 
1644 
1645 	return 0;
1646 }
1647 
1648 static int gfx_v9_0_sw_init(void *handle)
1649 {
1650 	int i, j, k, r, ring_id;
1651 	struct amdgpu_ring *ring;
1652 	struct amdgpu_kiq *kiq;
1653 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1654 
1655 	switch (adev->asic_type) {
1656 	case CHIP_VEGA10:
1657 	case CHIP_VEGA12:
1658 	case CHIP_VEGA20:
1659 	case CHIP_RAVEN:
1660 		adev->gfx.mec.num_mec = 2;
1661 		break;
1662 	default:
1663 		adev->gfx.mec.num_mec = 1;
1664 		break;
1665 	}
1666 
1667 	adev->gfx.mec.num_pipe_per_mec = 4;
1668 	adev->gfx.mec.num_queue_per_pipe = 8;
1669 
1670 	/* EOP Event */
1671 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
1672 	if (r)
1673 		return r;
1674 
1675 	/* Privileged reg */
1676 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
1677 			      &adev->gfx.priv_reg_irq);
1678 	if (r)
1679 		return r;
1680 
1681 	/* Privileged inst */
1682 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
1683 			      &adev->gfx.priv_inst_irq);
1684 	if (r)
1685 		return r;
1686 
1687 	/* ECC error */
1688 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
1689 			      &adev->gfx.cp_ecc_error_irq);
1690 	if (r)
1691 		return r;
1692 
1693 	/* FUE error */
1694 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
1695 			      &adev->gfx.cp_ecc_error_irq);
1696 	if (r)
1697 		return r;
1698 
1699 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1700 
1701 	gfx_v9_0_scratch_init(adev);
1702 
1703 	r = gfx_v9_0_init_microcode(adev);
1704 	if (r) {
1705 		DRM_ERROR("Failed to load gfx firmware!\n");
1706 		return r;
1707 	}
1708 
1709 	r = adev->gfx.rlc.funcs->init(adev);
1710 	if (r) {
1711 		DRM_ERROR("Failed to init rlc BOs!\n");
1712 		return r;
1713 	}
1714 
1715 	r = gfx_v9_0_mec_init(adev);
1716 	if (r) {
1717 		DRM_ERROR("Failed to init MEC BOs!\n");
1718 		return r;
1719 	}
1720 
1721 	/* set up the gfx ring */
1722 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1723 		ring = &adev->gfx.gfx_ring[i];
1724 		ring->ring_obj = NULL;
1725 		if (!i)
1726 			sprintf(ring->name, "gfx");
1727 		else
1728 			sprintf(ring->name, "gfx_%d", i);
1729 		ring->use_doorbell = true;
1730 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
1731 		r = amdgpu_ring_init(adev, ring, 1024,
1732 				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
1733 		if (r)
1734 			return r;
1735 	}
1736 
1737 	/* set up the compute queues - allocate horizontally across pipes */
1738 	ring_id = 0;
1739 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1740 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1741 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1742 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
1743 					continue;
1744 
1745 				r = gfx_v9_0_compute_ring_init(adev,
1746 							       ring_id,
1747 							       i, k, j);
1748 				if (r)
1749 					return r;
1750 
1751 				ring_id++;
1752 			}
1753 		}
1754 	}
1755 
1756 	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
1757 	if (r) {
1758 		DRM_ERROR("Failed to init KIQ BOs!\n");
1759 		return r;
1760 	}
1761 
1762 	kiq = &adev->gfx.kiq;
1763 	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
1764 	if (r)
1765 		return r;
1766 
1767 	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
1768 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
1769 	if (r)
1770 		return r;
1771 
1772 	adev->gfx.ce_ram_size = 0x8000;
1773 
1774 	r = gfx_v9_0_gpu_early_init(adev);
1775 	if (r)
1776 		return r;
1777 
1778 	r = gfx_v9_0_ngg_init(adev);
1779 	if (r)
1780 		return r;
1781 
1782 	return 0;
1783 }
1784 
1785 
1786 static int gfx_v9_0_sw_fini(void *handle)
1787 {
1788 	int i;
1789 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1790 
1791 	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
1792 			adev->gfx.ras_if) {
1793 		struct ras_common_if *ras_if = adev->gfx.ras_if;
1794 		struct ras_ih_if ih_info = {
1795 			.head = *ras_if,
1796 		};
1797 
1798 		amdgpu_ras_debugfs_remove(adev, ras_if);
1799 		amdgpu_ras_sysfs_remove(adev, ras_if);
1800 		amdgpu_ras_interrupt_remove_handler(adev,  &ih_info);
1801 		amdgpu_ras_feature_enable(adev, ras_if, 0);
1802 		kfree(ras_if);
1803 	}
1804 
1805 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1806 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1807 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
1808 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1809 
1810 	amdgpu_gfx_mqd_sw_fini(adev);
1811 	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
1812 	amdgpu_gfx_kiq_fini(adev);
1813 
1814 	gfx_v9_0_mec_fini(adev);
1815 	gfx_v9_0_ngg_fini(adev);
1816 	amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1817 	if (adev->asic_type == CHIP_RAVEN) {
1818 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
1819 				&adev->gfx.rlc.cp_table_gpu_addr,
1820 				(void **)&adev->gfx.rlc.cp_table_ptr);
1821 	}
1822 	gfx_v9_0_free_microcode(adev);
1823 
1824 	return 0;
1825 }
1826 
1827 
1828 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
1829 {
1830 	/* TODO */
1831 }
1832 
1833 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
1834 {
1835 	u32 data;
1836 
1837 	if (instance == 0xffffffff)
1838 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
1839 	else
1840 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
1841 
1842 	if (se_num == 0xffffffff)
1843 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
1844 	else
1845 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
1846 
1847 	if (sh_num == 0xffffffff)
1848 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
1849 	else
1850 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
1851 
1852 	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
1853 }
1854 
1855 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1856 {
1857 	u32 data, mask;
1858 
1859 	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
1860 	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
1861 
1862 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1863 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
1864 
1865 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
1866 					 adev->gfx.config.max_sh_per_se);
1867 
1868 	return (~data) & mask;
1869 }
1870 
1871 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
1872 {
1873 	int i, j;
1874 	u32 data;
1875 	u32 active_rbs = 0;
1876 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
1877 					adev->gfx.config.max_sh_per_se;
1878 
1879 	mutex_lock(&adev->grbm_idx_mutex);
1880 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1881 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1882 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1883 			data = gfx_v9_0_get_rb_active_bitmap(adev);
1884 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
1885 					       rb_bitmap_width_per_sh);
1886 		}
1887 	}
1888 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1889 	mutex_unlock(&adev->grbm_idx_mutex);
1890 
1891 	adev->gfx.config.backend_enable_mask = active_rbs;
1892 	adev->gfx.config.num_rbs = hweight32(active_rbs);
1893 }
1894 
1895 #define DEFAULT_SH_MEM_BASES	(0x6000)
1896 #define FIRST_COMPUTE_VMID	(8)
1897 #define LAST_COMPUTE_VMID	(16)
1898 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
1899 {
1900 	int i;
1901 	uint32_t sh_mem_config;
1902 	uint32_t sh_mem_bases;
1903 
1904 	/*
1905 	 * Configure apertures:
1906 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
1907 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
1908 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
1909 	 */
1910 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
1911 
1912 	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
1913 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
1914 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
1915 
1916 	mutex_lock(&adev->srbm_mutex);
1917 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
1918 		soc15_grbm_select(adev, 0, 0, 0, i);
1919 		/* CP and shaders */
1920 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
1921 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
1922 	}
1923 	soc15_grbm_select(adev, 0, 0, 0, 0);
1924 	mutex_unlock(&adev->srbm_mutex);
1925 
1926 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
1927 	   acccess. These should be enabled by FW for target VMIDs. */
1928 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
1929 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
1930 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
1931 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
1932 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
1933 	}
1934 }
1935 
1936 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
1937 {
1938 	u32 tmp;
1939 	int i;
1940 
1941 	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
1942 
1943 	gfx_v9_0_tiling_mode_table_init(adev);
1944 
1945 	gfx_v9_0_setup_rb(adev);
1946 	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
1947 	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
1948 
1949 	/* XXX SH_MEM regs */
1950 	/* where to put LDS, scratch, GPUVM in FSA64 space */
1951 	mutex_lock(&adev->srbm_mutex);
1952 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) {
1953 		soc15_grbm_select(adev, 0, 0, 0, i);
1954 		/* CP and shaders */
1955 		if (i == 0) {
1956 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1957 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1958 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
1959 					    !!amdgpu_noretry);
1960 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
1961 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
1962 		} else {
1963 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1964 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1965 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
1966 					    !!amdgpu_noretry);
1967 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
1968 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
1969 				(adev->gmc.private_aperture_start >> 48));
1970 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
1971 				(adev->gmc.shared_aperture_start >> 48));
1972 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
1973 		}
1974 	}
1975 	soc15_grbm_select(adev, 0, 0, 0, 0);
1976 
1977 	mutex_unlock(&adev->srbm_mutex);
1978 
1979 	gfx_v9_0_init_compute_vmid(adev);
1980 }
1981 
1982 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
1983 {
1984 	u32 i, j, k;
1985 	u32 mask;
1986 
1987 	mutex_lock(&adev->grbm_idx_mutex);
1988 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1989 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1990 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1991 			for (k = 0; k < adev->usec_timeout; k++) {
1992 				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
1993 					break;
1994 				udelay(1);
1995 			}
1996 			if (k == adev->usec_timeout) {
1997 				gfx_v9_0_select_se_sh(adev, 0xffffffff,
1998 						      0xffffffff, 0xffffffff);
1999 				mutex_unlock(&adev->grbm_idx_mutex);
2000 				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2001 					 i, j);
2002 				return;
2003 			}
2004 		}
2005 	}
2006 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2007 	mutex_unlock(&adev->grbm_idx_mutex);
2008 
2009 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2010 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2011 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2012 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2013 	for (k = 0; k < adev->usec_timeout; k++) {
2014 		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2015 			break;
2016 		udelay(1);
2017 	}
2018 }
2019 
2020 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2021 					       bool enable)
2022 {
2023 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2024 
2025 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2026 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2027 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2028 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2029 
2030 	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2031 }
2032 
2033 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2034 {
2035 	/* csib */
2036 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2037 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2038 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2039 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2040 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2041 			adev->gfx.rlc.clear_state_size);
2042 }
2043 
2044 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2045 				int indirect_offset,
2046 				int list_size,
2047 				int *unique_indirect_regs,
2048 				int unique_indirect_reg_count,
2049 				int *indirect_start_offsets,
2050 				int *indirect_start_offsets_count,
2051 				int max_start_offsets_count)
2052 {
2053 	int idx;
2054 
2055 	for (; indirect_offset < list_size; indirect_offset++) {
2056 		WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2057 		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2058 		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2059 
2060 		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2061 			indirect_offset += 2;
2062 
2063 			/* look for the matching indice */
2064 			for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2065 				if (unique_indirect_regs[idx] ==
2066 					register_list_format[indirect_offset] ||
2067 					!unique_indirect_regs[idx])
2068 					break;
2069 			}
2070 
2071 			BUG_ON(idx >= unique_indirect_reg_count);
2072 
2073 			if (!unique_indirect_regs[idx])
2074 				unique_indirect_regs[idx] = register_list_format[indirect_offset];
2075 
2076 			indirect_offset++;
2077 		}
2078 	}
2079 }
2080 
2081 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2082 {
2083 	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2084 	int unique_indirect_reg_count = 0;
2085 
2086 	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2087 	int indirect_start_offsets_count = 0;
2088 
2089 	int list_size = 0;
2090 	int i = 0, j = 0;
2091 	u32 tmp = 0;
2092 
2093 	u32 *register_list_format =
2094 		kmemdup(adev->gfx.rlc.register_list_format,
2095 			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2096 	if (!register_list_format)
2097 		return -ENOMEM;
2098 
2099 	/* setup unique_indirect_regs array and indirect_start_offsets array */
2100 	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2101 	gfx_v9_1_parse_ind_reg_list(register_list_format,
2102 				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2103 				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2104 				    unique_indirect_regs,
2105 				    unique_indirect_reg_count,
2106 				    indirect_start_offsets,
2107 				    &indirect_start_offsets_count,
2108 				    ARRAY_SIZE(indirect_start_offsets));
2109 
2110 	/* enable auto inc in case it is disabled */
2111 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2112 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2113 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2114 
2115 	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2116 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2117 		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2118 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2119 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2120 			adev->gfx.rlc.register_restore[i]);
2121 
2122 	/* load indirect register */
2123 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2124 		adev->gfx.rlc.reg_list_format_start);
2125 
2126 	/* direct register portion */
2127 	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2128 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2129 			register_list_format[i]);
2130 
2131 	/* indirect register portion */
2132 	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2133 		if (register_list_format[i] == 0xFFFFFFFF) {
2134 			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2135 			continue;
2136 		}
2137 
2138 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2139 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2140 
2141 		for (j = 0; j < unique_indirect_reg_count; j++) {
2142 			if (register_list_format[i] == unique_indirect_regs[j]) {
2143 				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2144 				break;
2145 			}
2146 		}
2147 
2148 		BUG_ON(j >= unique_indirect_reg_count);
2149 
2150 		i++;
2151 	}
2152 
2153 	/* set save/restore list size */
2154 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2155 	list_size = list_size >> 1;
2156 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2157 		adev->gfx.rlc.reg_restore_list_size);
2158 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2159 
2160 	/* write the starting offsets to RLC scratch ram */
2161 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2162 		adev->gfx.rlc.starting_offsets_start);
2163 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2164 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2165 		       indirect_start_offsets[i]);
2166 
2167 	/* load unique indirect regs*/
2168 	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2169 		if (unique_indirect_regs[i] != 0) {
2170 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2171 			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2172 			       unique_indirect_regs[i] & 0x3FFFF);
2173 
2174 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2175 			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2176 			       unique_indirect_regs[i] >> 20);
2177 		}
2178 	}
2179 
2180 	kfree(register_list_format);
2181 	return 0;
2182 }
2183 
2184 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2185 {
2186 	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2187 }
2188 
2189 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2190 					     bool enable)
2191 {
2192 	uint32_t data = 0;
2193 	uint32_t default_data = 0;
2194 
2195 	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2196 	if (enable == true) {
2197 		/* enable GFXIP control over CGPG */
2198 		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2199 		if(default_data != data)
2200 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2201 
2202 		/* update status */
2203 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2204 		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2205 		if(default_data != data)
2206 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2207 	} else {
2208 		/* restore GFXIP control over GCPG */
2209 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2210 		if(default_data != data)
2211 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2212 	}
2213 }
2214 
2215 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2216 {
2217 	uint32_t data = 0;
2218 
2219 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2220 			      AMD_PG_SUPPORT_GFX_SMG |
2221 			      AMD_PG_SUPPORT_GFX_DMG)) {
2222 		/* init IDLE_POLL_COUNT = 60 */
2223 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2224 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2225 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2226 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2227 
2228 		/* init RLC PG Delay */
2229 		data = 0;
2230 		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2231 		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2232 		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2233 		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2234 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2235 
2236 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2237 		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2238 		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2239 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2240 
2241 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2242 		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2243 		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2244 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2245 
2246 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2247 		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2248 
2249 		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2250 		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2251 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2252 
2253 		pwr_10_0_gfxip_control_over_cgpg(adev, true);
2254 	}
2255 }
2256 
2257 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2258 						bool enable)
2259 {
2260 	uint32_t data = 0;
2261 	uint32_t default_data = 0;
2262 
2263 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2264 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2265 			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2266 			     enable ? 1 : 0);
2267 	if (default_data != data)
2268 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2269 }
2270 
2271 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2272 						bool enable)
2273 {
2274 	uint32_t data = 0;
2275 	uint32_t default_data = 0;
2276 
2277 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2278 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2279 			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2280 			     enable ? 1 : 0);
2281 	if(default_data != data)
2282 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2283 }
2284 
2285 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2286 					bool enable)
2287 {
2288 	uint32_t data = 0;
2289 	uint32_t default_data = 0;
2290 
2291 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2292 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2293 			     CP_PG_DISABLE,
2294 			     enable ? 0 : 1);
2295 	if(default_data != data)
2296 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2297 }
2298 
2299 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2300 						bool enable)
2301 {
2302 	uint32_t data, default_data;
2303 
2304 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2305 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2306 			     GFX_POWER_GATING_ENABLE,
2307 			     enable ? 1 : 0);
2308 	if(default_data != data)
2309 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2310 }
2311 
2312 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2313 						bool enable)
2314 {
2315 	uint32_t data, default_data;
2316 
2317 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2318 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2319 			     GFX_PIPELINE_PG_ENABLE,
2320 			     enable ? 1 : 0);
2321 	if(default_data != data)
2322 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2323 
2324 	if (!enable)
2325 		/* read any GFX register to wake up GFX */
2326 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2327 }
2328 
2329 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2330 						       bool enable)
2331 {
2332 	uint32_t data, default_data;
2333 
2334 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2335 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2336 			     STATIC_PER_CU_PG_ENABLE,
2337 			     enable ? 1 : 0);
2338 	if(default_data != data)
2339 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2340 }
2341 
2342 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2343 						bool enable)
2344 {
2345 	uint32_t data, default_data;
2346 
2347 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2348 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2349 			     DYN_PER_CU_PG_ENABLE,
2350 			     enable ? 1 : 0);
2351 	if(default_data != data)
2352 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2353 }
2354 
2355 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2356 {
2357 	gfx_v9_0_init_csb(adev);
2358 
2359 	/*
2360 	 * Rlc save restore list is workable since v2_1.
2361 	 * And it's needed by gfxoff feature.
2362 	 */
2363 	if (adev->gfx.rlc.is_rlc_v2_1) {
2364 		gfx_v9_1_init_rlc_save_restore_list(adev);
2365 		gfx_v9_0_enable_save_restore_machine(adev);
2366 	}
2367 
2368 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2369 			      AMD_PG_SUPPORT_GFX_SMG |
2370 			      AMD_PG_SUPPORT_GFX_DMG |
2371 			      AMD_PG_SUPPORT_CP |
2372 			      AMD_PG_SUPPORT_GDS |
2373 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
2374 		WREG32(mmRLC_JUMP_TABLE_RESTORE,
2375 		       adev->gfx.rlc.cp_table_gpu_addr >> 8);
2376 		gfx_v9_0_init_gfx_power_gating(adev);
2377 	}
2378 }
2379 
2380 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2381 {
2382 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2383 	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2384 	gfx_v9_0_wait_for_rlc_serdes(adev);
2385 }
2386 
2387 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2388 {
2389 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2390 	udelay(50);
2391 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2392 	udelay(50);
2393 }
2394 
2395 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2396 {
2397 #ifdef AMDGPU_RLC_DEBUG_RETRY
2398 	u32 rlc_ucode_ver;
2399 #endif
2400 
2401 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2402 	udelay(50);
2403 
2404 	/* carrizo do enable cp interrupt after cp inited */
2405 	if (!(adev->flags & AMD_IS_APU)) {
2406 		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2407 		udelay(50);
2408 	}
2409 
2410 #ifdef AMDGPU_RLC_DEBUG_RETRY
2411 	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
2412 	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2413 	if(rlc_ucode_ver == 0x108) {
2414 		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2415 				rlc_ucode_ver, adev->gfx.rlc_fw_version);
2416 		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2417 		 * default is 0x9C4 to create a 100us interval */
2418 		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2419 		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2420 		 * to disable the page fault retry interrupts, default is
2421 		 * 0x100 (256) */
2422 		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2423 	}
2424 #endif
2425 }
2426 
2427 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2428 {
2429 	const struct rlc_firmware_header_v2_0 *hdr;
2430 	const __le32 *fw_data;
2431 	unsigned i, fw_size;
2432 
2433 	if (!adev->gfx.rlc_fw)
2434 		return -EINVAL;
2435 
2436 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2437 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
2438 
2439 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2440 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2441 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2442 
2443 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2444 			RLCG_UCODE_LOADING_START_ADDRESS);
2445 	for (i = 0; i < fw_size; i++)
2446 		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2447 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2448 
2449 	return 0;
2450 }
2451 
2452 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2453 {
2454 	int r;
2455 
2456 	if (amdgpu_sriov_vf(adev)) {
2457 		gfx_v9_0_init_csb(adev);
2458 		return 0;
2459 	}
2460 
2461 	adev->gfx.rlc.funcs->stop(adev);
2462 
2463 	/* disable CG */
2464 	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2465 
2466 	gfx_v9_0_init_pg(adev);
2467 
2468 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2469 		/* legacy rlc firmware loading */
2470 		r = gfx_v9_0_rlc_load_microcode(adev);
2471 		if (r)
2472 			return r;
2473 	}
2474 
2475 	switch (adev->asic_type) {
2476 	case CHIP_RAVEN:
2477 		if (amdgpu_lbpw == 0)
2478 			gfx_v9_0_enable_lbpw(adev, false);
2479 		else
2480 			gfx_v9_0_enable_lbpw(adev, true);
2481 		break;
2482 	case CHIP_VEGA20:
2483 		if (amdgpu_lbpw > 0)
2484 			gfx_v9_0_enable_lbpw(adev, true);
2485 		else
2486 			gfx_v9_0_enable_lbpw(adev, false);
2487 		break;
2488 	default:
2489 		break;
2490 	}
2491 
2492 	adev->gfx.rlc.funcs->start(adev);
2493 
2494 	return 0;
2495 }
2496 
2497 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2498 {
2499 	int i;
2500 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
2501 
2502 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2503 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2504 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
2505 	if (!enable) {
2506 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2507 			adev->gfx.gfx_ring[i].sched.ready = false;
2508 	}
2509 	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
2510 	udelay(50);
2511 }
2512 
2513 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2514 {
2515 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
2516 	const struct gfx_firmware_header_v1_0 *ce_hdr;
2517 	const struct gfx_firmware_header_v1_0 *me_hdr;
2518 	const __le32 *fw_data;
2519 	unsigned i, fw_size;
2520 
2521 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2522 		return -EINVAL;
2523 
2524 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2525 		adev->gfx.pfp_fw->data;
2526 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2527 		adev->gfx.ce_fw->data;
2528 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
2529 		adev->gfx.me_fw->data;
2530 
2531 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2532 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2533 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2534 
2535 	gfx_v9_0_cp_gfx_enable(adev, false);
2536 
2537 	/* PFP */
2538 	fw_data = (const __le32 *)
2539 		(adev->gfx.pfp_fw->data +
2540 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2541 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2542 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
2543 	for (i = 0; i < fw_size; i++)
2544 		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2545 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2546 
2547 	/* CE */
2548 	fw_data = (const __le32 *)
2549 		(adev->gfx.ce_fw->data +
2550 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2551 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2552 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
2553 	for (i = 0; i < fw_size; i++)
2554 		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2555 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2556 
2557 	/* ME */
2558 	fw_data = (const __le32 *)
2559 		(adev->gfx.me_fw->data +
2560 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2561 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2562 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
2563 	for (i = 0; i < fw_size; i++)
2564 		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2565 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2566 
2567 	return 0;
2568 }
2569 
2570 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
2571 {
2572 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2573 	const struct cs_section_def *sect = NULL;
2574 	const struct cs_extent_def *ext = NULL;
2575 	int r, i, tmp;
2576 
2577 	/* init the CP */
2578 	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
2579 	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
2580 
2581 	gfx_v9_0_cp_gfx_enable(adev, true);
2582 
2583 	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
2584 	if (r) {
2585 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
2586 		return r;
2587 	}
2588 
2589 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2590 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2591 
2592 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2593 	amdgpu_ring_write(ring, 0x80000000);
2594 	amdgpu_ring_write(ring, 0x80000000);
2595 
2596 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
2597 		for (ext = sect->section; ext->extent != NULL; ++ext) {
2598 			if (sect->id == SECT_CONTEXT) {
2599 				amdgpu_ring_write(ring,
2600 				       PACKET3(PACKET3_SET_CONTEXT_REG,
2601 					       ext->reg_count));
2602 				amdgpu_ring_write(ring,
2603 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
2604 				for (i = 0; i < ext->reg_count; i++)
2605 					amdgpu_ring_write(ring, ext->extent[i]);
2606 			}
2607 		}
2608 	}
2609 
2610 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2611 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2612 
2613 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2614 	amdgpu_ring_write(ring, 0);
2615 
2616 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2617 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2618 	amdgpu_ring_write(ring, 0x8000);
2619 	amdgpu_ring_write(ring, 0x8000);
2620 
2621 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
2622 	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
2623 		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
2624 	amdgpu_ring_write(ring, tmp);
2625 	amdgpu_ring_write(ring, 0);
2626 
2627 	amdgpu_ring_commit(ring);
2628 
2629 	return 0;
2630 }
2631 
2632 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
2633 {
2634 	struct amdgpu_ring *ring;
2635 	u32 tmp;
2636 	u32 rb_bufsz;
2637 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
2638 
2639 	/* Set the write pointer delay */
2640 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
2641 
2642 	/* set the RB to use vmid 0 */
2643 	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
2644 
2645 	/* Set ring buffer size */
2646 	ring = &adev->gfx.gfx_ring[0];
2647 	rb_bufsz = order_base_2(ring->ring_size / 8);
2648 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
2649 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
2650 #ifdef __BIG_ENDIAN
2651 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
2652 #endif
2653 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2654 
2655 	/* Initialize the ring buffer's write pointers */
2656 	ring->wptr = 0;
2657 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
2658 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
2659 
2660 	/* set the wb address wether it's enabled or not */
2661 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2662 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
2663 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
2664 
2665 	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2666 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
2667 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
2668 
2669 	mdelay(1);
2670 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2671 
2672 	rb_addr = ring->gpu_addr >> 8;
2673 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
2674 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
2675 
2676 	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
2677 	if (ring->use_doorbell) {
2678 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2679 				    DOORBELL_OFFSET, ring->doorbell_index);
2680 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2681 				    DOORBELL_EN, 1);
2682 	} else {
2683 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
2684 	}
2685 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
2686 
2687 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
2688 			DOORBELL_RANGE_LOWER, ring->doorbell_index);
2689 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
2690 
2691 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
2692 		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
2693 
2694 
2695 	/* start the ring */
2696 	gfx_v9_0_cp_gfx_start(adev);
2697 	ring->sched.ready = true;
2698 
2699 	return 0;
2700 }
2701 
2702 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
2703 {
2704 	int i;
2705 
2706 	if (enable) {
2707 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
2708 	} else {
2709 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
2710 			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
2711 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
2712 			adev->gfx.compute_ring[i].sched.ready = false;
2713 		adev->gfx.kiq.ring.sched.ready = false;
2714 	}
2715 	udelay(50);
2716 }
2717 
2718 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
2719 {
2720 	const struct gfx_firmware_header_v1_0 *mec_hdr;
2721 	const __le32 *fw_data;
2722 	unsigned i;
2723 	u32 tmp;
2724 
2725 	if (!adev->gfx.mec_fw)
2726 		return -EINVAL;
2727 
2728 	gfx_v9_0_cp_compute_enable(adev, false);
2729 
2730 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
2731 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
2732 
2733 	fw_data = (const __le32 *)
2734 		(adev->gfx.mec_fw->data +
2735 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
2736 	tmp = 0;
2737 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
2738 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2739 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
2740 
2741 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
2742 		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
2743 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
2744 		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
2745 
2746 	/* MEC1 */
2747 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
2748 			 mec_hdr->jt_offset);
2749 	for (i = 0; i < mec_hdr->jt_size; i++)
2750 		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
2751 			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
2752 
2753 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
2754 			adev->gfx.mec_fw_version);
2755 	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
2756 
2757 	return 0;
2758 }
2759 
2760 /* KIQ functions */
2761 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
2762 {
2763 	uint32_t tmp;
2764 	struct amdgpu_device *adev = ring->adev;
2765 
2766 	/* tell RLC which is KIQ queue */
2767 	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
2768 	tmp &= 0xffffff00;
2769 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
2770 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2771 	tmp |= 0x80;
2772 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2773 }
2774 
2775 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
2776 {
2777 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
2778 	uint64_t queue_mask = 0;
2779 	int r, i;
2780 
2781 	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
2782 		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
2783 			continue;
2784 
2785 		/* This situation may be hit in the future if a new HW
2786 		 * generation exposes more than 64 queues. If so, the
2787 		 * definition of queue_mask needs updating */
2788 		if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
2789 			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
2790 			break;
2791 		}
2792 
2793 		queue_mask |= (1ull << i);
2794 	}
2795 
2796 	r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
2797 	if (r) {
2798 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
2799 		return r;
2800 	}
2801 
2802 	/* set resources */
2803 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
2804 	amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
2805 			  PACKET3_SET_RESOURCES_QUEUE_TYPE(0));	/* vmid_mask:0 queue_type:0 (KIQ) */
2806 	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
2807 	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
2808 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
2809 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
2810 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
2811 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
2812 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2813 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
2814 		uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
2815 		uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2816 
2817 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
2818 		/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
2819 		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
2820 				  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
2821 				  PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
2822 				  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
2823 				  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
2824 				  PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
2825 				  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
2826 				  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
2827 				  PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
2828 				  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
2829 		amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
2830 		amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
2831 		amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
2832 		amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
2833 		amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
2834 	}
2835 
2836 	r = amdgpu_ring_test_helper(kiq_ring);
2837 	if (r)
2838 		DRM_ERROR("KCQ enable failed\n");
2839 
2840 	return r;
2841 }
2842 
2843 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
2844 {
2845 	struct amdgpu_device *adev = ring->adev;
2846 	struct v9_mqd *mqd = ring->mqd_ptr;
2847 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
2848 	uint32_t tmp;
2849 
2850 	mqd->header = 0xC0310800;
2851 	mqd->compute_pipelinestat_enable = 0x00000001;
2852 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
2853 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
2854 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
2855 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
2856 	mqd->compute_misc_reserved = 0x00000003;
2857 
2858 	mqd->dynamic_cu_mask_addr_lo =
2859 		lower_32_bits(ring->mqd_gpu_addr
2860 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2861 	mqd->dynamic_cu_mask_addr_hi =
2862 		upper_32_bits(ring->mqd_gpu_addr
2863 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2864 
2865 	eop_base_addr = ring->eop_gpu_addr >> 8;
2866 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
2867 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
2868 
2869 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2870 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
2871 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
2872 			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
2873 
2874 	mqd->cp_hqd_eop_control = tmp;
2875 
2876 	/* enable doorbell? */
2877 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
2878 
2879 	if (ring->use_doorbell) {
2880 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2881 				    DOORBELL_OFFSET, ring->doorbell_index);
2882 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2883 				    DOORBELL_EN, 1);
2884 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2885 				    DOORBELL_SOURCE, 0);
2886 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2887 				    DOORBELL_HIT, 0);
2888 	} else {
2889 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2890 					 DOORBELL_EN, 0);
2891 	}
2892 
2893 	mqd->cp_hqd_pq_doorbell_control = tmp;
2894 
2895 	/* disable the queue if it's active */
2896 	ring->wptr = 0;
2897 	mqd->cp_hqd_dequeue_request = 0;
2898 	mqd->cp_hqd_pq_rptr = 0;
2899 	mqd->cp_hqd_pq_wptr_lo = 0;
2900 	mqd->cp_hqd_pq_wptr_hi = 0;
2901 
2902 	/* set the pointer to the MQD */
2903 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
2904 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
2905 
2906 	/* set MQD vmid to 0 */
2907 	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
2908 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
2909 	mqd->cp_mqd_control = tmp;
2910 
2911 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
2912 	hqd_gpu_addr = ring->gpu_addr >> 8;
2913 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
2914 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
2915 
2916 	/* set up the HQD, this is similar to CP_RB0_CNTL */
2917 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
2918 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
2919 			    (order_base_2(ring->ring_size / 4) - 1));
2920 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
2921 			((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
2922 #ifdef __BIG_ENDIAN
2923 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
2924 #endif
2925 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
2926 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
2927 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
2928 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
2929 	mqd->cp_hqd_pq_control = tmp;
2930 
2931 	/* set the wb address whether it's enabled or not */
2932 	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2933 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
2934 	mqd->cp_hqd_pq_rptr_report_addr_hi =
2935 		upper_32_bits(wb_gpu_addr) & 0xffff;
2936 
2937 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
2938 	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2939 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
2940 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
2941 
2942 	tmp = 0;
2943 	/* enable the doorbell if requested */
2944 	if (ring->use_doorbell) {
2945 		tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
2946 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2947 				DOORBELL_OFFSET, ring->doorbell_index);
2948 
2949 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2950 					 DOORBELL_EN, 1);
2951 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2952 					 DOORBELL_SOURCE, 0);
2953 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2954 					 DOORBELL_HIT, 0);
2955 	}
2956 
2957 	mqd->cp_hqd_pq_doorbell_control = tmp;
2958 
2959 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
2960 	ring->wptr = 0;
2961 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
2962 
2963 	/* set the vmid for the queue */
2964 	mqd->cp_hqd_vmid = 0;
2965 
2966 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
2967 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
2968 	mqd->cp_hqd_persistent_state = tmp;
2969 
2970 	/* set MIN_IB_AVAIL_SIZE */
2971 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
2972 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
2973 	mqd->cp_hqd_ib_control = tmp;
2974 
2975 	/* activate the queue */
2976 	mqd->cp_hqd_active = 1;
2977 
2978 	return 0;
2979 }
2980 
2981 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
2982 {
2983 	struct amdgpu_device *adev = ring->adev;
2984 	struct v9_mqd *mqd = ring->mqd_ptr;
2985 	int j;
2986 
2987 	/* disable wptr polling */
2988 	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
2989 
2990 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
2991 	       mqd->cp_hqd_eop_base_addr_lo);
2992 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
2993 	       mqd->cp_hqd_eop_base_addr_hi);
2994 
2995 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2996 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
2997 	       mqd->cp_hqd_eop_control);
2998 
2999 	/* enable doorbell? */
3000 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3001 	       mqd->cp_hqd_pq_doorbell_control);
3002 
3003 	/* disable the queue if it's active */
3004 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3005 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3006 		for (j = 0; j < adev->usec_timeout; j++) {
3007 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3008 				break;
3009 			udelay(1);
3010 		}
3011 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3012 		       mqd->cp_hqd_dequeue_request);
3013 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3014 		       mqd->cp_hqd_pq_rptr);
3015 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3016 		       mqd->cp_hqd_pq_wptr_lo);
3017 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3018 		       mqd->cp_hqd_pq_wptr_hi);
3019 	}
3020 
3021 	/* set the pointer to the MQD */
3022 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3023 	       mqd->cp_mqd_base_addr_lo);
3024 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3025 	       mqd->cp_mqd_base_addr_hi);
3026 
3027 	/* set MQD vmid to 0 */
3028 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3029 	       mqd->cp_mqd_control);
3030 
3031 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3032 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3033 	       mqd->cp_hqd_pq_base_lo);
3034 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3035 	       mqd->cp_hqd_pq_base_hi);
3036 
3037 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3038 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3039 	       mqd->cp_hqd_pq_control);
3040 
3041 	/* set the wb address whether it's enabled or not */
3042 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3043 				mqd->cp_hqd_pq_rptr_report_addr_lo);
3044 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3045 				mqd->cp_hqd_pq_rptr_report_addr_hi);
3046 
3047 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3048 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3049 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
3050 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3051 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3052 
3053 	/* enable the doorbell if requested */
3054 	if (ring->use_doorbell) {
3055 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3056 					(adev->doorbell_index.kiq * 2) << 2);
3057 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3058 					(adev->doorbell_index.userqueue_end * 2) << 2);
3059 	}
3060 
3061 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3062 	       mqd->cp_hqd_pq_doorbell_control);
3063 
3064 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3065 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3066 	       mqd->cp_hqd_pq_wptr_lo);
3067 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3068 	       mqd->cp_hqd_pq_wptr_hi);
3069 
3070 	/* set the vmid for the queue */
3071 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3072 
3073 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3074 	       mqd->cp_hqd_persistent_state);
3075 
3076 	/* activate the queue */
3077 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3078 	       mqd->cp_hqd_active);
3079 
3080 	if (ring->use_doorbell)
3081 		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3082 
3083 	return 0;
3084 }
3085 
3086 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3087 {
3088 	struct amdgpu_device *adev = ring->adev;
3089 	int j;
3090 
3091 	/* disable the queue if it's active */
3092 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3093 
3094 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3095 
3096 		for (j = 0; j < adev->usec_timeout; j++) {
3097 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3098 				break;
3099 			udelay(1);
3100 		}
3101 
3102 		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3103 			DRM_DEBUG("KIQ dequeue request failed.\n");
3104 
3105 			/* Manual disable if dequeue request times out */
3106 			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3107 		}
3108 
3109 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3110 		      0);
3111 	}
3112 
3113 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3114 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3115 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3116 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3117 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3118 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3119 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3120 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3121 
3122 	return 0;
3123 }
3124 
3125 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3126 {
3127 	struct amdgpu_device *adev = ring->adev;
3128 	struct v9_mqd *mqd = ring->mqd_ptr;
3129 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3130 
3131 	gfx_v9_0_kiq_setting(ring);
3132 
3133 	if (adev->in_gpu_reset) { /* for GPU_RESET case */
3134 		/* reset MQD to a clean status */
3135 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3136 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3137 
3138 		/* reset ring buffer */
3139 		ring->wptr = 0;
3140 		amdgpu_ring_clear_ring(ring);
3141 
3142 		mutex_lock(&adev->srbm_mutex);
3143 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3144 		gfx_v9_0_kiq_init_register(ring);
3145 		soc15_grbm_select(adev, 0, 0, 0, 0);
3146 		mutex_unlock(&adev->srbm_mutex);
3147 	} else {
3148 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3149 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3150 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3151 		mutex_lock(&adev->srbm_mutex);
3152 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3153 		gfx_v9_0_mqd_init(ring);
3154 		gfx_v9_0_kiq_init_register(ring);
3155 		soc15_grbm_select(adev, 0, 0, 0, 0);
3156 		mutex_unlock(&adev->srbm_mutex);
3157 
3158 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3159 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3160 	}
3161 
3162 	return 0;
3163 }
3164 
3165 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3166 {
3167 	struct amdgpu_device *adev = ring->adev;
3168 	struct v9_mqd *mqd = ring->mqd_ptr;
3169 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
3170 
3171 	if (!adev->in_gpu_reset && !adev->in_suspend) {
3172 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3173 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3174 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3175 		mutex_lock(&adev->srbm_mutex);
3176 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3177 		gfx_v9_0_mqd_init(ring);
3178 		soc15_grbm_select(adev, 0, 0, 0, 0);
3179 		mutex_unlock(&adev->srbm_mutex);
3180 
3181 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3182 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3183 	} else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3184 		/* reset MQD to a clean status */
3185 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3186 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3187 
3188 		/* reset ring buffer */
3189 		ring->wptr = 0;
3190 		amdgpu_ring_clear_ring(ring);
3191 	} else {
3192 		amdgpu_ring_clear_ring(ring);
3193 	}
3194 
3195 	return 0;
3196 }
3197 
3198 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3199 {
3200 	struct amdgpu_ring *ring;
3201 	int r;
3202 
3203 	ring = &adev->gfx.kiq.ring;
3204 
3205 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
3206 	if (unlikely(r != 0))
3207 		return r;
3208 
3209 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3210 	if (unlikely(r != 0))
3211 		return r;
3212 
3213 	gfx_v9_0_kiq_init_queue(ring);
3214 	amdgpu_bo_kunmap(ring->mqd_obj);
3215 	ring->mqd_ptr = NULL;
3216 	amdgpu_bo_unreserve(ring->mqd_obj);
3217 	ring->sched.ready = true;
3218 	return 0;
3219 }
3220 
3221 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3222 {
3223 	struct amdgpu_ring *ring = NULL;
3224 	int r = 0, i;
3225 
3226 	gfx_v9_0_cp_compute_enable(adev, true);
3227 
3228 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3229 		ring = &adev->gfx.compute_ring[i];
3230 
3231 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3232 		if (unlikely(r != 0))
3233 			goto done;
3234 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3235 		if (!r) {
3236 			r = gfx_v9_0_kcq_init_queue(ring);
3237 			amdgpu_bo_kunmap(ring->mqd_obj);
3238 			ring->mqd_ptr = NULL;
3239 		}
3240 		amdgpu_bo_unreserve(ring->mqd_obj);
3241 		if (r)
3242 			goto done;
3243 	}
3244 
3245 	r = gfx_v9_0_kiq_kcq_enable(adev);
3246 done:
3247 	return r;
3248 }
3249 
3250 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3251 {
3252 	int r, i;
3253 	struct amdgpu_ring *ring;
3254 
3255 	if (!(adev->flags & AMD_IS_APU))
3256 		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3257 
3258 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3259 		/* legacy firmware loading */
3260 		r = gfx_v9_0_cp_gfx_load_microcode(adev);
3261 		if (r)
3262 			return r;
3263 
3264 		r = gfx_v9_0_cp_compute_load_microcode(adev);
3265 		if (r)
3266 			return r;
3267 	}
3268 
3269 	r = gfx_v9_0_kiq_resume(adev);
3270 	if (r)
3271 		return r;
3272 
3273 	r = gfx_v9_0_cp_gfx_resume(adev);
3274 	if (r)
3275 		return r;
3276 
3277 	r = gfx_v9_0_kcq_resume(adev);
3278 	if (r)
3279 		return r;
3280 
3281 	ring = &adev->gfx.gfx_ring[0];
3282 	r = amdgpu_ring_test_helper(ring);
3283 	if (r)
3284 		return r;
3285 
3286 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3287 		ring = &adev->gfx.compute_ring[i];
3288 		amdgpu_ring_test_helper(ring);
3289 	}
3290 
3291 	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3292 
3293 	return 0;
3294 }
3295 
3296 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3297 {
3298 	gfx_v9_0_cp_gfx_enable(adev, enable);
3299 	gfx_v9_0_cp_compute_enable(adev, enable);
3300 }
3301 
3302 static int gfx_v9_0_hw_init(void *handle)
3303 {
3304 	int r;
3305 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3306 
3307 	gfx_v9_0_init_golden_registers(adev);
3308 
3309 	gfx_v9_0_constants_init(adev);
3310 
3311 	r = gfx_v9_0_csb_vram_pin(adev);
3312 	if (r)
3313 		return r;
3314 
3315 	r = adev->gfx.rlc.funcs->resume(adev);
3316 	if (r)
3317 		return r;
3318 
3319 	r = gfx_v9_0_cp_resume(adev);
3320 	if (r)
3321 		return r;
3322 
3323 	r = gfx_v9_0_ngg_en(adev);
3324 	if (r)
3325 		return r;
3326 
3327 	return r;
3328 }
3329 
3330 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
3331 {
3332 	int r, i;
3333 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3334 
3335 	r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
3336 	if (r)
3337 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3338 
3339 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3340 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3341 
3342 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
3343 		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3344 						PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
3345 						PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
3346 						PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
3347 						PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
3348 		amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
3349 		amdgpu_ring_write(kiq_ring, 0);
3350 		amdgpu_ring_write(kiq_ring, 0);
3351 		amdgpu_ring_write(kiq_ring, 0);
3352 	}
3353 	r = amdgpu_ring_test_helper(kiq_ring);
3354 	if (r)
3355 		DRM_ERROR("KCQ disable failed\n");
3356 
3357 	return r;
3358 }
3359 
3360 static int gfx_v9_0_hw_fini(void *handle)
3361 {
3362 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3363 
3364 	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3365 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3366 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3367 
3368 	/* disable KCQ to avoid CPC touch memory not valid anymore */
3369 	gfx_v9_0_kcq_disable(adev);
3370 
3371 	if (amdgpu_sriov_vf(adev)) {
3372 		gfx_v9_0_cp_gfx_enable(adev, false);
3373 		/* must disable polling for SRIOV when hw finished, otherwise
3374 		 * CPC engine may still keep fetching WB address which is already
3375 		 * invalid after sw finished and trigger DMAR reading error in
3376 		 * hypervisor side.
3377 		 */
3378 		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3379 		return 0;
3380 	}
3381 
3382 	/* Use deinitialize sequence from CAIL when unbinding device from driver,
3383 	 * otherwise KIQ is hanging when binding back
3384 	 */
3385 	if (!adev->in_gpu_reset && !adev->in_suspend) {
3386 		mutex_lock(&adev->srbm_mutex);
3387 		soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3388 				adev->gfx.kiq.ring.pipe,
3389 				adev->gfx.kiq.ring.queue, 0);
3390 		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3391 		soc15_grbm_select(adev, 0, 0, 0, 0);
3392 		mutex_unlock(&adev->srbm_mutex);
3393 	}
3394 
3395 	gfx_v9_0_cp_enable(adev, false);
3396 	adev->gfx.rlc.funcs->stop(adev);
3397 
3398 	gfx_v9_0_csb_vram_unpin(adev);
3399 
3400 	return 0;
3401 }
3402 
3403 static int gfx_v9_0_suspend(void *handle)
3404 {
3405 	return gfx_v9_0_hw_fini(handle);
3406 }
3407 
3408 static int gfx_v9_0_resume(void *handle)
3409 {
3410 	return gfx_v9_0_hw_init(handle);
3411 }
3412 
3413 static bool gfx_v9_0_is_idle(void *handle)
3414 {
3415 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3416 
3417 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3418 				GRBM_STATUS, GUI_ACTIVE))
3419 		return false;
3420 	else
3421 		return true;
3422 }
3423 
3424 static int gfx_v9_0_wait_for_idle(void *handle)
3425 {
3426 	unsigned i;
3427 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3428 
3429 	for (i = 0; i < adev->usec_timeout; i++) {
3430 		if (gfx_v9_0_is_idle(handle))
3431 			return 0;
3432 		udelay(1);
3433 	}
3434 	return -ETIMEDOUT;
3435 }
3436 
3437 static int gfx_v9_0_soft_reset(void *handle)
3438 {
3439 	u32 grbm_soft_reset = 0;
3440 	u32 tmp;
3441 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3442 
3443 	/* GRBM_STATUS */
3444 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3445 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3446 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3447 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3448 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3449 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3450 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3451 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3452 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3453 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3454 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3455 	}
3456 
3457 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3458 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3459 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3460 	}
3461 
3462 	/* GRBM_STATUS2 */
3463 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3464 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3465 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3466 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3467 
3468 
3469 	if (grbm_soft_reset) {
3470 		/* stop the rlc */
3471 		adev->gfx.rlc.funcs->stop(adev);
3472 
3473 		/* Disable GFX parsing/prefetching */
3474 		gfx_v9_0_cp_gfx_enable(adev, false);
3475 
3476 		/* Disable MEC parsing/prefetching */
3477 		gfx_v9_0_cp_compute_enable(adev, false);
3478 
3479 		if (grbm_soft_reset) {
3480 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3481 			tmp |= grbm_soft_reset;
3482 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3483 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3484 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3485 
3486 			udelay(50);
3487 
3488 			tmp &= ~grbm_soft_reset;
3489 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3490 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3491 		}
3492 
3493 		/* Wait a little for things to settle down */
3494 		udelay(50);
3495 	}
3496 	return 0;
3497 }
3498 
3499 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3500 {
3501 	uint64_t clock;
3502 
3503 	mutex_lock(&adev->gfx.gpu_clock_mutex);
3504 	WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
3505 	clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
3506 		((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
3507 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
3508 	return clock;
3509 }
3510 
3511 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
3512 					  uint32_t vmid,
3513 					  uint32_t gds_base, uint32_t gds_size,
3514 					  uint32_t gws_base, uint32_t gws_size,
3515 					  uint32_t oa_base, uint32_t oa_size)
3516 {
3517 	struct amdgpu_device *adev = ring->adev;
3518 
3519 	/* GDS Base */
3520 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3521 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
3522 				   gds_base);
3523 
3524 	/* GDS Size */
3525 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3526 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
3527 				   gds_size);
3528 
3529 	/* GWS */
3530 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3531 				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
3532 				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
3533 
3534 	/* OA */
3535 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3536 				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
3537 				   (1 << (oa_size + oa_base)) - (1 << oa_base));
3538 }
3539 
3540 static const u32 vgpr_init_compute_shader[] =
3541 {
3542 	0xb07c0000, 0xbe8000ff,
3543 	0x000000f8, 0xbf110800,
3544 	0x7e000280, 0x7e020280,
3545 	0x7e040280, 0x7e060280,
3546 	0x7e080280, 0x7e0a0280,
3547 	0x7e0c0280, 0x7e0e0280,
3548 	0x80808800, 0xbe803200,
3549 	0xbf84fff5, 0xbf9c0000,
3550 	0xd28c0001, 0x0001007f,
3551 	0xd28d0001, 0x0002027e,
3552 	0x10020288, 0xb8810904,
3553 	0xb7814000, 0xd1196a01,
3554 	0x00000301, 0xbe800087,
3555 	0xbefc00c1, 0xd89c4000,
3556 	0x00020201, 0xd89cc080,
3557 	0x00040401, 0x320202ff,
3558 	0x00000800, 0x80808100,
3559 	0xbf84fff8, 0x7e020280,
3560 	0xbf810000, 0x00000000,
3561 };
3562 
3563 static const u32 sgpr_init_compute_shader[] =
3564 {
3565 	0xb07c0000, 0xbe8000ff,
3566 	0x0000005f, 0xbee50080,
3567 	0xbe812c65, 0xbe822c65,
3568 	0xbe832c65, 0xbe842c65,
3569 	0xbe852c65, 0xb77c0005,
3570 	0x80808500, 0xbf84fff8,
3571 	0xbe800080, 0xbf810000,
3572 };
3573 
3574 static const struct soc15_reg_entry vgpr_init_regs[] = {
3575    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3576    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3577    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3578    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3579    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3580    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3581    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3582    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3583    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
3584    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
3585 };
3586 
3587 static const struct soc15_reg_entry sgpr_init_regs[] = {
3588    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3589    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3590    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3591    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3592    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3593    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3594    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3595    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3596    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
3597    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
3598 };
3599 
3600 static const struct soc15_reg_entry sec_ded_counter_registers[] = {
3601    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
3602    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
3603    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
3604    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
3605    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
3606    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
3607    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
3608    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
3609    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
3610    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
3611    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
3612    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
3613    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
3614    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
3615    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
3616    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
3617    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
3618    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
3619    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
3620    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
3621    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
3622    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
3623    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
3624    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
3625    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
3626    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
3627    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
3628    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
3629    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
3630    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
3631    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
3632    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
3633 };
3634 
3635 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
3636 {
3637 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
3638 	int i, r;
3639 
3640 	r = amdgpu_ring_alloc(ring, 7);
3641 	if (r) {
3642 		DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
3643 			ring->name, r);
3644 		return r;
3645 	}
3646 
3647 	WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
3648 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
3649 
3650 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3651 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
3652 				PACKET3_DMA_DATA_DST_SEL(1) |
3653 				PACKET3_DMA_DATA_SRC_SEL(2) |
3654 				PACKET3_DMA_DATA_ENGINE(0)));
3655 	amdgpu_ring_write(ring, 0);
3656 	amdgpu_ring_write(ring, 0);
3657 	amdgpu_ring_write(ring, 0);
3658 	amdgpu_ring_write(ring, 0);
3659 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
3660 				adev->gds.gds_size);
3661 
3662 	amdgpu_ring_commit(ring);
3663 
3664 	for (i = 0; i < adev->usec_timeout; i++) {
3665 		if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
3666 			break;
3667 		udelay(1);
3668 	}
3669 
3670 	if (i >= adev->usec_timeout)
3671 		r = -ETIMEDOUT;
3672 
3673 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
3674 
3675 	return r;
3676 }
3677 
3678 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
3679 {
3680 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
3681 	struct amdgpu_ib ib;
3682 	struct dma_fence *f = NULL;
3683 	int r, i, j, k;
3684 	unsigned total_size, vgpr_offset, sgpr_offset;
3685 	u64 gpu_addr;
3686 
3687 	/* only support when RAS is enabled */
3688 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
3689 		return 0;
3690 
3691 	/* bail if the compute ring is not ready */
3692 	if (!ring->sched.ready)
3693 		return 0;
3694 
3695 	total_size =
3696 		((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
3697 	total_size +=
3698 		((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
3699 	total_size = ALIGN(total_size, 256);
3700 	vgpr_offset = total_size;
3701 	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
3702 	sgpr_offset = total_size;
3703 	total_size += sizeof(sgpr_init_compute_shader);
3704 
3705 	/* allocate an indirect buffer to put the commands in */
3706 	memset(&ib, 0, sizeof(ib));
3707 	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
3708 	if (r) {
3709 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
3710 		return r;
3711 	}
3712 
3713 	/* load the compute shaders */
3714 	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
3715 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
3716 
3717 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
3718 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
3719 
3720 	/* init the ib length to 0 */
3721 	ib.length_dw = 0;
3722 
3723 	/* VGPR */
3724 	/* write the register state for the compute dispatch */
3725 	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
3726 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
3727 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
3728 								- PACKET3_SET_SH_REG_START;
3729 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
3730 	}
3731 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
3732 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
3733 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
3734 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
3735 							- PACKET3_SET_SH_REG_START;
3736 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
3737 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
3738 
3739 	/* write dispatch packet */
3740 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
3741 	ib.ptr[ib.length_dw++] = 128; /* x */
3742 	ib.ptr[ib.length_dw++] = 1; /* y */
3743 	ib.ptr[ib.length_dw++] = 1; /* z */
3744 	ib.ptr[ib.length_dw++] =
3745 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
3746 
3747 	/* write CS partial flush packet */
3748 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
3749 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
3750 
3751 	/* SGPR */
3752 	/* write the register state for the compute dispatch */
3753 	for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
3754 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
3755 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
3756 								- PACKET3_SET_SH_REG_START;
3757 		ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
3758 	}
3759 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
3760 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
3761 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
3762 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
3763 							- PACKET3_SET_SH_REG_START;
3764 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
3765 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
3766 
3767 	/* write dispatch packet */
3768 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
3769 	ib.ptr[ib.length_dw++] = 128; /* x */
3770 	ib.ptr[ib.length_dw++] = 1; /* y */
3771 	ib.ptr[ib.length_dw++] = 1; /* z */
3772 	ib.ptr[ib.length_dw++] =
3773 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
3774 
3775 	/* write CS partial flush packet */
3776 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
3777 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
3778 
3779 	/* shedule the ib on the ring */
3780 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
3781 	if (r) {
3782 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
3783 		goto fail;
3784 	}
3785 
3786 	/* wait for the GPU to finish processing the IB */
3787 	r = dma_fence_wait(f, false);
3788 	if (r) {
3789 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
3790 		goto fail;
3791 	}
3792 
3793 	/* read back registers to clear the counters */
3794 	mutex_lock(&adev->grbm_idx_mutex);
3795 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
3796 		for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
3797 			for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
3798 				gfx_v9_0_select_se_sh(adev, j, 0x0, k);
3799 				RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
3800 			}
3801 		}
3802 	}
3803 	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
3804 	mutex_unlock(&adev->grbm_idx_mutex);
3805 
3806 fail:
3807 	amdgpu_ib_free(adev, &ib, NULL);
3808 	dma_fence_put(f);
3809 
3810 	return r;
3811 }
3812 
3813 static int gfx_v9_0_early_init(void *handle)
3814 {
3815 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3816 
3817 	adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
3818 	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
3819 	gfx_v9_0_set_ring_funcs(adev);
3820 	gfx_v9_0_set_irq_funcs(adev);
3821 	gfx_v9_0_set_gds_init(adev);
3822 	gfx_v9_0_set_rlc_funcs(adev);
3823 
3824 	return 0;
3825 }
3826 
3827 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
3828 		struct amdgpu_iv_entry *entry);
3829 
3830 static int gfx_v9_0_ecc_late_init(void *handle)
3831 {
3832 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3833 	struct ras_common_if **ras_if = &adev->gfx.ras_if;
3834 	struct ras_ih_if ih_info = {
3835 		.cb = gfx_v9_0_process_ras_data_cb,
3836 	};
3837 	struct ras_fs_if fs_info = {
3838 		.sysfs_name = "gfx_err_count",
3839 		.debugfs_name = "gfx_err_inject",
3840 	};
3841 	struct ras_common_if ras_block = {
3842 		.block = AMDGPU_RAS_BLOCK__GFX,
3843 		.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
3844 		.sub_block_index = 0,
3845 		.name = "gfx",
3846 	};
3847 	int r;
3848 
3849 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
3850 		amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
3851 		return 0;
3852 	}
3853 
3854 	r = gfx_v9_0_do_edc_gds_workarounds(adev);
3855 	if (r)
3856 		return r;
3857 
3858 	/* requires IBs so do in late init after IB pool is initialized */
3859 	r = gfx_v9_0_do_edc_gpr_workarounds(adev);
3860 	if (r)
3861 		return r;
3862 
3863 	/* handle resume path. */
3864 	if (*ras_if) {
3865 		/* resend ras TA enable cmd during resume.
3866 		 * prepare to handle failure.
3867 		 */
3868 		ih_info.head = **ras_if;
3869 		r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
3870 		if (r) {
3871 			if (r == -EAGAIN) {
3872 				/* request a gpu reset. will run again. */
3873 				amdgpu_ras_request_reset_on_boot(adev,
3874 						AMDGPU_RAS_BLOCK__GFX);
3875 				return 0;
3876 			}
3877 			/* fail to enable ras, cleanup all. */
3878 			goto irq;
3879 		}
3880 		/* enable successfully. continue. */
3881 		goto resume;
3882 	}
3883 
3884 	*ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
3885 	if (!*ras_if)
3886 		return -ENOMEM;
3887 
3888 	**ras_if = ras_block;
3889 
3890 	r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
3891 	if (r) {
3892 		if (r == -EAGAIN) {
3893 			amdgpu_ras_request_reset_on_boot(adev,
3894 					AMDGPU_RAS_BLOCK__GFX);
3895 			r = 0;
3896 		}
3897 		goto feature;
3898 	}
3899 
3900 	ih_info.head = **ras_if;
3901 	fs_info.head = **ras_if;
3902 
3903 	r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
3904 	if (r)
3905 		goto interrupt;
3906 
3907 	amdgpu_ras_debugfs_create(adev, &fs_info);
3908 
3909 	r = amdgpu_ras_sysfs_create(adev, &fs_info);
3910 	if (r)
3911 		goto sysfs;
3912 resume:
3913 	r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
3914 	if (r)
3915 		goto irq;
3916 
3917 	return 0;
3918 irq:
3919 	amdgpu_ras_sysfs_remove(adev, *ras_if);
3920 sysfs:
3921 	amdgpu_ras_debugfs_remove(adev, *ras_if);
3922 	amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
3923 interrupt:
3924 	amdgpu_ras_feature_enable(adev, *ras_if, 0);
3925 feature:
3926 	kfree(*ras_if);
3927 	*ras_if = NULL;
3928 	return r;
3929 }
3930 
3931 static int gfx_v9_0_late_init(void *handle)
3932 {
3933 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3934 	int r;
3935 
3936 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
3937 	if (r)
3938 		return r;
3939 
3940 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
3941 	if (r)
3942 		return r;
3943 
3944 	r = gfx_v9_0_ecc_late_init(handle);
3945 	if (r)
3946 		return r;
3947 
3948 	return 0;
3949 }
3950 
3951 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
3952 {
3953 	uint32_t rlc_setting;
3954 
3955 	/* if RLC is not enabled, do nothing */
3956 	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
3957 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
3958 		return false;
3959 
3960 	return true;
3961 }
3962 
3963 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
3964 {
3965 	uint32_t data;
3966 	unsigned i;
3967 
3968 	data = RLC_SAFE_MODE__CMD_MASK;
3969 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
3970 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
3971 
3972 	/* wait for RLC_SAFE_MODE */
3973 	for (i = 0; i < adev->usec_timeout; i++) {
3974 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
3975 			break;
3976 		udelay(1);
3977 	}
3978 }
3979 
3980 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
3981 {
3982 	uint32_t data;
3983 
3984 	data = RLC_SAFE_MODE__CMD_MASK;
3985 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
3986 }
3987 
3988 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
3989 						bool enable)
3990 {
3991 	amdgpu_gfx_rlc_enter_safe_mode(adev);
3992 
3993 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
3994 		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
3995 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
3996 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
3997 	} else {
3998 		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
3999 		gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4000 	}
4001 
4002 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4003 }
4004 
4005 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4006 						bool enable)
4007 {
4008 	/* TODO: double check if we need to perform under safe mode */
4009 	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
4010 
4011 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4012 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4013 	else
4014 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4015 
4016 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4017 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4018 	else
4019 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4020 
4021 	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
4022 }
4023 
4024 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4025 						      bool enable)
4026 {
4027 	uint32_t data, def;
4028 
4029 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4030 
4031 	/* It is disabled by HW by default */
4032 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4033 		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
4034 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4035 
4036 		if (adev->asic_type != CHIP_VEGA12)
4037 			data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4038 
4039 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4040 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4041 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4042 
4043 		/* only for Vega10 & Raven1 */
4044 		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4045 
4046 		if (def != data)
4047 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4048 
4049 		/* MGLS is a global flag to control all MGLS in GFX */
4050 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4051 			/* 2 - RLC memory Light sleep */
4052 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4053 				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4054 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4055 				if (def != data)
4056 					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4057 			}
4058 			/* 3 - CP memory Light sleep */
4059 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4060 				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4061 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4062 				if (def != data)
4063 					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4064 			}
4065 		}
4066 	} else {
4067 		/* 1 - MGCG_OVERRIDE */
4068 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4069 
4070 		if (adev->asic_type != CHIP_VEGA12)
4071 			data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4072 
4073 		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4074 			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4075 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4076 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4077 
4078 		if (def != data)
4079 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4080 
4081 		/* 2 - disable MGLS in RLC */
4082 		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4083 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4084 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4085 			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4086 		}
4087 
4088 		/* 3 - disable MGLS in CP */
4089 		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4090 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4091 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4092 			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4093 		}
4094 	}
4095 
4096 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4097 }
4098 
4099 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4100 					   bool enable)
4101 {
4102 	uint32_t data, def;
4103 
4104 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4105 
4106 	/* Enable 3D CGCG/CGLS */
4107 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4108 		/* write cmd to clear cgcg/cgls ov */
4109 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4110 		/* unset CGCG override */
4111 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4112 		/* update CGCG and CGLS override bits */
4113 		if (def != data)
4114 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4115 
4116 		/* enable 3Dcgcg FSM(0x0000363f) */
4117 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4118 
4119 		data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4120 			RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4121 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4122 			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4123 				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4124 		if (def != data)
4125 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4126 
4127 		/* set IDLE_POLL_COUNT(0x00900100) */
4128 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4129 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4130 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4131 		if (def != data)
4132 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4133 	} else {
4134 		/* Disable CGCG/CGLS */
4135 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4136 		/* disable cgcg, cgls should be disabled */
4137 		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4138 			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4139 		/* disable cgcg and cgls in FSM */
4140 		if (def != data)
4141 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4142 	}
4143 
4144 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4145 }
4146 
4147 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4148 						      bool enable)
4149 {
4150 	uint32_t def, data;
4151 
4152 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4153 
4154 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4155 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4156 		/* unset CGCG override */
4157 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4158 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4159 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4160 		else
4161 			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4162 		/* update CGCG and CGLS override bits */
4163 		if (def != data)
4164 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4165 
4166 		/* enable cgcg FSM(0x0000363F) */
4167 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4168 
4169 		data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4170 			RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4171 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4172 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4173 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4174 		if (def != data)
4175 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4176 
4177 		/* set IDLE_POLL_COUNT(0x00900100) */
4178 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4179 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4180 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4181 		if (def != data)
4182 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4183 	} else {
4184 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4185 		/* reset CGCG/CGLS bits */
4186 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4187 		/* disable cgcg and cgls in FSM */
4188 		if (def != data)
4189 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4190 	}
4191 
4192 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4193 }
4194 
4195 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4196 					    bool enable)
4197 {
4198 	if (enable) {
4199 		/* CGCG/CGLS should be enabled after MGCG/MGLS
4200 		 * ===  MGCG + MGLS ===
4201 		 */
4202 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4203 		/* ===  CGCG /CGLS for GFX 3D Only === */
4204 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4205 		/* ===  CGCG + CGLS === */
4206 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4207 	} else {
4208 		/* CGCG/CGLS should be disabled before MGCG/MGLS
4209 		 * ===  CGCG + CGLS ===
4210 		 */
4211 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4212 		/* ===  CGCG /CGLS for GFX 3D Only === */
4213 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4214 		/* ===  MGCG + MGLS === */
4215 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4216 	}
4217 	return 0;
4218 }
4219 
4220 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4221 	.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4222 	.set_safe_mode = gfx_v9_0_set_safe_mode,
4223 	.unset_safe_mode = gfx_v9_0_unset_safe_mode,
4224 	.init = gfx_v9_0_rlc_init,
4225 	.get_csb_size = gfx_v9_0_get_csb_size,
4226 	.get_csb_buffer = gfx_v9_0_get_csb_buffer,
4227 	.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4228 	.resume = gfx_v9_0_rlc_resume,
4229 	.stop = gfx_v9_0_rlc_stop,
4230 	.reset = gfx_v9_0_rlc_reset,
4231 	.start = gfx_v9_0_rlc_start
4232 };
4233 
4234 static int gfx_v9_0_set_powergating_state(void *handle,
4235 					  enum amd_powergating_state state)
4236 {
4237 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4238 	bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
4239 
4240 	switch (adev->asic_type) {
4241 	case CHIP_RAVEN:
4242 		if (!enable) {
4243 			amdgpu_gfx_off_ctrl(adev, false);
4244 			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4245 		}
4246 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4247 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4248 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4249 		} else {
4250 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4251 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4252 		}
4253 
4254 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4255 			gfx_v9_0_enable_cp_power_gating(adev, true);
4256 		else
4257 			gfx_v9_0_enable_cp_power_gating(adev, false);
4258 
4259 		/* update gfx cgpg state */
4260 		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4261 
4262 		/* update mgcg state */
4263 		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4264 
4265 		if (enable)
4266 			amdgpu_gfx_off_ctrl(adev, true);
4267 		break;
4268 	case CHIP_VEGA12:
4269 		if (!enable) {
4270 			amdgpu_gfx_off_ctrl(adev, false);
4271 			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4272 		} else {
4273 			amdgpu_gfx_off_ctrl(adev, true);
4274 		}
4275 		break;
4276 	default:
4277 		break;
4278 	}
4279 
4280 	return 0;
4281 }
4282 
4283 static int gfx_v9_0_set_clockgating_state(void *handle,
4284 					  enum amd_clockgating_state state)
4285 {
4286 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4287 
4288 	if (amdgpu_sriov_vf(adev))
4289 		return 0;
4290 
4291 	switch (adev->asic_type) {
4292 	case CHIP_VEGA10:
4293 	case CHIP_VEGA12:
4294 	case CHIP_VEGA20:
4295 	case CHIP_RAVEN:
4296 		gfx_v9_0_update_gfx_clock_gating(adev,
4297 						 state == AMD_CG_STATE_GATE ? true : false);
4298 		break;
4299 	default:
4300 		break;
4301 	}
4302 	return 0;
4303 }
4304 
4305 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4306 {
4307 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4308 	int data;
4309 
4310 	if (amdgpu_sriov_vf(adev))
4311 		*flags = 0;
4312 
4313 	/* AMD_CG_SUPPORT_GFX_MGCG */
4314 	data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4315 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4316 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
4317 
4318 	/* AMD_CG_SUPPORT_GFX_CGCG */
4319 	data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4320 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4321 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
4322 
4323 	/* AMD_CG_SUPPORT_GFX_CGLS */
4324 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4325 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
4326 
4327 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
4328 	data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4329 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4330 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4331 
4332 	/* AMD_CG_SUPPORT_GFX_CP_LS */
4333 	data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4334 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4335 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4336 
4337 	/* AMD_CG_SUPPORT_GFX_3D_CGCG */
4338 	data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4339 	if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4340 		*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4341 
4342 	/* AMD_CG_SUPPORT_GFX_3D_CGLS */
4343 	if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4344 		*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4345 }
4346 
4347 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4348 {
4349 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4350 }
4351 
4352 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4353 {
4354 	struct amdgpu_device *adev = ring->adev;
4355 	u64 wptr;
4356 
4357 	/* XXX check if swapping is necessary on BE */
4358 	if (ring->use_doorbell) {
4359 		wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4360 	} else {
4361 		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4362 		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4363 	}
4364 
4365 	return wptr;
4366 }
4367 
4368 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4369 {
4370 	struct amdgpu_device *adev = ring->adev;
4371 
4372 	if (ring->use_doorbell) {
4373 		/* XXX check if swapping is necessary on BE */
4374 		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4375 		WDOORBELL64(ring->doorbell_index, ring->wptr);
4376 	} else {
4377 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4378 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4379 	}
4380 }
4381 
4382 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4383 {
4384 	struct amdgpu_device *adev = ring->adev;
4385 	u32 ref_and_mask, reg_mem_engine;
4386 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
4387 
4388 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4389 		switch (ring->me) {
4390 		case 1:
4391 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4392 			break;
4393 		case 2:
4394 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4395 			break;
4396 		default:
4397 			return;
4398 		}
4399 		reg_mem_engine = 0;
4400 	} else {
4401 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4402 		reg_mem_engine = 1; /* pfp */
4403 	}
4404 
4405 	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4406 			      adev->nbio_funcs->get_hdp_flush_req_offset(adev),
4407 			      adev->nbio_funcs->get_hdp_flush_done_offset(adev),
4408 			      ref_and_mask, ref_and_mask, 0x20);
4409 }
4410 
4411 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4412 					struct amdgpu_job *job,
4413 					struct amdgpu_ib *ib,
4414 					uint32_t flags)
4415 {
4416 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4417 	u32 header, control = 0;
4418 
4419 	if (ib->flags & AMDGPU_IB_FLAG_CE)
4420 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4421 	else
4422 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4423 
4424 	control |= ib->length_dw | (vmid << 24);
4425 
4426 	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
4427 		control |= INDIRECT_BUFFER_PRE_ENB(1);
4428 
4429 		if (!(ib->flags & AMDGPU_IB_FLAG_CE))
4430 			gfx_v9_0_ring_emit_de_meta(ring);
4431 	}
4432 
4433 	amdgpu_ring_write(ring, header);
4434 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4435 	amdgpu_ring_write(ring,
4436 #ifdef __BIG_ENDIAN
4437 		(2 << 0) |
4438 #endif
4439 		lower_32_bits(ib->gpu_addr));
4440 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4441 	amdgpu_ring_write(ring, control);
4442 }
4443 
4444 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4445 					  struct amdgpu_job *job,
4446 					  struct amdgpu_ib *ib,
4447 					  uint32_t flags)
4448 {
4449 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4450 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
4451 
4452 	/* Currently, there is a high possibility to get wave ID mismatch
4453 	 * between ME and GDS, leading to a hw deadlock, because ME generates
4454 	 * different wave IDs than the GDS expects. This situation happens
4455 	 * randomly when at least 5 compute pipes use GDS ordered append.
4456 	 * The wave IDs generated by ME are also wrong after suspend/resume.
4457 	 * Those are probably bugs somewhere else in the kernel driver.
4458 	 *
4459 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
4460 	 * GDS to 0 for this ring (me/pipe).
4461 	 */
4462 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
4463 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
4464 		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
4465 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
4466 	}
4467 
4468 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
4469 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4470 	amdgpu_ring_write(ring,
4471 #ifdef __BIG_ENDIAN
4472 				(2 << 0) |
4473 #endif
4474 				lower_32_bits(ib->gpu_addr));
4475 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4476 	amdgpu_ring_write(ring, control);
4477 }
4478 
4479 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
4480 				     u64 seq, unsigned flags)
4481 {
4482 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4483 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4484 	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
4485 
4486 	/* RELEASE_MEM - flush caches, send int */
4487 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
4488 	amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
4489 					       EOP_TC_NC_ACTION_EN) :
4490 					      (EOP_TCL1_ACTION_EN |
4491 					       EOP_TC_ACTION_EN |
4492 					       EOP_TC_WB_ACTION_EN |
4493 					       EOP_TC_MD_ACTION_EN)) |
4494 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4495 				 EVENT_INDEX(5)));
4496 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4497 
4498 	/*
4499 	 * the address should be Qword aligned if 64bit write, Dword
4500 	 * aligned if only send 32bit data low (discard data high)
4501 	 */
4502 	if (write64bit)
4503 		BUG_ON(addr & 0x7);
4504 	else
4505 		BUG_ON(addr & 0x3);
4506 	amdgpu_ring_write(ring, lower_32_bits(addr));
4507 	amdgpu_ring_write(ring, upper_32_bits(addr));
4508 	amdgpu_ring_write(ring, lower_32_bits(seq));
4509 	amdgpu_ring_write(ring, upper_32_bits(seq));
4510 	amdgpu_ring_write(ring, 0);
4511 }
4512 
4513 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
4514 {
4515 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4516 	uint32_t seq = ring->fence_drv.sync_seq;
4517 	uint64_t addr = ring->fence_drv.gpu_addr;
4518 
4519 	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
4520 			      lower_32_bits(addr), upper_32_bits(addr),
4521 			      seq, 0xffffffff, 4);
4522 }
4523 
4524 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4525 					unsigned vmid, uint64_t pd_addr)
4526 {
4527 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
4528 
4529 	/* compute doesn't have PFP */
4530 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
4531 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
4532 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4533 		amdgpu_ring_write(ring, 0x0);
4534 	}
4535 }
4536 
4537 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4538 {
4539 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
4540 }
4541 
4542 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4543 {
4544 	u64 wptr;
4545 
4546 	/* XXX check if swapping is necessary on BE */
4547 	if (ring->use_doorbell)
4548 		wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
4549 	else
4550 		BUG();
4551 	return wptr;
4552 }
4553 
4554 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
4555 					   bool acquire)
4556 {
4557 	struct amdgpu_device *adev = ring->adev;
4558 	int pipe_num, tmp, reg;
4559 	int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
4560 
4561 	pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
4562 
4563 	/* first me only has 2 entries, GFX and HP3D */
4564 	if (ring->me > 0)
4565 		pipe_num -= 2;
4566 
4567 	reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
4568 	tmp = RREG32(reg);
4569 	tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
4570 	WREG32(reg, tmp);
4571 }
4572 
4573 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
4574 					    struct amdgpu_ring *ring,
4575 					    bool acquire)
4576 {
4577 	int i, pipe;
4578 	bool reserve;
4579 	struct amdgpu_ring *iring;
4580 
4581 	mutex_lock(&adev->gfx.pipe_reserve_mutex);
4582 	pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
4583 	if (acquire)
4584 		set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4585 	else
4586 		clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4587 
4588 	if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
4589 		/* Clear all reservations - everyone reacquires all resources */
4590 		for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
4591 			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
4592 						       true);
4593 
4594 		for (i = 0; i < adev->gfx.num_compute_rings; ++i)
4595 			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
4596 						       true);
4597 	} else {
4598 		/* Lower all pipes without a current reservation */
4599 		for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
4600 			iring = &adev->gfx.gfx_ring[i];
4601 			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
4602 							   iring->me,
4603 							   iring->pipe,
4604 							   0);
4605 			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4606 			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4607 		}
4608 
4609 		for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
4610 			iring = &adev->gfx.compute_ring[i];
4611 			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
4612 							   iring->me,
4613 							   iring->pipe,
4614 							   0);
4615 			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4616 			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4617 		}
4618 	}
4619 
4620 	mutex_unlock(&adev->gfx.pipe_reserve_mutex);
4621 }
4622 
4623 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
4624 				      struct amdgpu_ring *ring,
4625 				      bool acquire)
4626 {
4627 	uint32_t pipe_priority = acquire ? 0x2 : 0x0;
4628 	uint32_t queue_priority = acquire ? 0xf : 0x0;
4629 
4630 	mutex_lock(&adev->srbm_mutex);
4631 	soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4632 
4633 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
4634 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
4635 
4636 	soc15_grbm_select(adev, 0, 0, 0, 0);
4637 	mutex_unlock(&adev->srbm_mutex);
4638 }
4639 
4640 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
4641 					       enum drm_sched_priority priority)
4642 {
4643 	struct amdgpu_device *adev = ring->adev;
4644 	bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
4645 
4646 	if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
4647 		return;
4648 
4649 	gfx_v9_0_hqd_set_priority(adev, ring, acquire);
4650 	gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
4651 }
4652 
4653 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4654 {
4655 	struct amdgpu_device *adev = ring->adev;
4656 
4657 	/* XXX check if swapping is necessary on BE */
4658 	if (ring->use_doorbell) {
4659 		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4660 		WDOORBELL64(ring->doorbell_index, ring->wptr);
4661 	} else{
4662 		BUG(); /* only DOORBELL method supported on gfx9 now */
4663 	}
4664 }
4665 
4666 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
4667 					 u64 seq, unsigned int flags)
4668 {
4669 	struct amdgpu_device *adev = ring->adev;
4670 
4671 	/* we only allocate 32bit for each seq wb address */
4672 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
4673 
4674 	/* write fence seq to the "addr" */
4675 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4676 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4677 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
4678 	amdgpu_ring_write(ring, lower_32_bits(addr));
4679 	amdgpu_ring_write(ring, upper_32_bits(addr));
4680 	amdgpu_ring_write(ring, lower_32_bits(seq));
4681 
4682 	if (flags & AMDGPU_FENCE_FLAG_INT) {
4683 		/* set register to trigger INT */
4684 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4685 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4686 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
4687 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
4688 		amdgpu_ring_write(ring, 0);
4689 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
4690 	}
4691 }
4692 
4693 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
4694 {
4695 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4696 	amdgpu_ring_write(ring, 0);
4697 }
4698 
4699 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
4700 {
4701 	struct v9_ce_ib_state ce_payload = {0};
4702 	uint64_t csa_addr;
4703 	int cnt;
4704 
4705 	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
4706 	csa_addr = amdgpu_csa_vaddr(ring->adev);
4707 
4708 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
4709 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
4710 				 WRITE_DATA_DST_SEL(8) |
4711 				 WR_CONFIRM) |
4712 				 WRITE_DATA_CACHE_POLICY(0));
4713 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
4714 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
4715 	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
4716 }
4717 
4718 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
4719 {
4720 	struct v9_de_ib_state de_payload = {0};
4721 	uint64_t csa_addr, gds_addr;
4722 	int cnt;
4723 
4724 	csa_addr = amdgpu_csa_vaddr(ring->adev);
4725 	gds_addr = csa_addr + 4096;
4726 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
4727 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
4728 
4729 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
4730 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
4731 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
4732 				 WRITE_DATA_DST_SEL(8) |
4733 				 WR_CONFIRM) |
4734 				 WRITE_DATA_CACHE_POLICY(0));
4735 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
4736 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
4737 	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
4738 }
4739 
4740 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
4741 {
4742 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
4743 	amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
4744 }
4745 
4746 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
4747 {
4748 	uint32_t dw2 = 0;
4749 
4750 	if (amdgpu_sriov_vf(ring->adev))
4751 		gfx_v9_0_ring_emit_ce_meta(ring);
4752 
4753 	gfx_v9_0_ring_emit_tmz(ring, true);
4754 
4755 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
4756 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
4757 		/* set load_global_config & load_global_uconfig */
4758 		dw2 |= 0x8001;
4759 		/* set load_cs_sh_regs */
4760 		dw2 |= 0x01000000;
4761 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
4762 		dw2 |= 0x10002;
4763 
4764 		/* set load_ce_ram if preamble presented */
4765 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
4766 			dw2 |= 0x10000000;
4767 	} else {
4768 		/* still load_ce_ram if this is the first time preamble presented
4769 		 * although there is no context switch happens.
4770 		 */
4771 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
4772 			dw2 |= 0x10000000;
4773 	}
4774 
4775 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4776 	amdgpu_ring_write(ring, dw2);
4777 	amdgpu_ring_write(ring, 0);
4778 }
4779 
4780 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
4781 {
4782 	unsigned ret;
4783 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
4784 	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
4785 	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
4786 	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
4787 	ret = ring->wptr & ring->buf_mask;
4788 	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
4789 	return ret;
4790 }
4791 
4792 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
4793 {
4794 	unsigned cur;
4795 	BUG_ON(offset > ring->buf_mask);
4796 	BUG_ON(ring->ring[offset] != 0x55aa55aa);
4797 
4798 	cur = (ring->wptr & ring->buf_mask) - 1;
4799 	if (likely(cur > offset))
4800 		ring->ring[offset] = cur - offset;
4801 	else
4802 		ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
4803 }
4804 
4805 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
4806 {
4807 	struct amdgpu_device *adev = ring->adev;
4808 
4809 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4810 	amdgpu_ring_write(ring, 0 |	/* src: register*/
4811 				(5 << 8) |	/* dst: memory */
4812 				(1 << 20));	/* write confirm */
4813 	amdgpu_ring_write(ring, reg);
4814 	amdgpu_ring_write(ring, 0);
4815 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4816 				adev->virt.reg_val_offs * 4));
4817 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4818 				adev->virt.reg_val_offs * 4));
4819 }
4820 
4821 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
4822 				    uint32_t val)
4823 {
4824 	uint32_t cmd = 0;
4825 
4826 	switch (ring->funcs->type) {
4827 	case AMDGPU_RING_TYPE_GFX:
4828 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
4829 		break;
4830 	case AMDGPU_RING_TYPE_KIQ:
4831 		cmd = (1 << 16); /* no inc addr */
4832 		break;
4833 	default:
4834 		cmd = WR_CONFIRM;
4835 		break;
4836 	}
4837 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4838 	amdgpu_ring_write(ring, cmd);
4839 	amdgpu_ring_write(ring, reg);
4840 	amdgpu_ring_write(ring, 0);
4841 	amdgpu_ring_write(ring, val);
4842 }
4843 
4844 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
4845 					uint32_t val, uint32_t mask)
4846 {
4847 	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
4848 }
4849 
4850 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
4851 						  uint32_t reg0, uint32_t reg1,
4852 						  uint32_t ref, uint32_t mask)
4853 {
4854 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4855 	struct amdgpu_device *adev = ring->adev;
4856 	bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
4857 		adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
4858 
4859 	if (fw_version_ok)
4860 		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
4861 				      ref, mask, 0x20);
4862 	else
4863 		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
4864 							   ref, mask);
4865 }
4866 
4867 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
4868 {
4869 	struct amdgpu_device *adev = ring->adev;
4870 	uint32_t value = 0;
4871 
4872 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
4873 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
4874 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
4875 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
4876 	WREG32_SOC15(GC, 0, mmSQ_CMD, value);
4877 }
4878 
4879 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4880 						 enum amdgpu_interrupt_state state)
4881 {
4882 	switch (state) {
4883 	case AMDGPU_IRQ_STATE_DISABLE:
4884 	case AMDGPU_IRQ_STATE_ENABLE:
4885 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4886 			       TIME_STAMP_INT_ENABLE,
4887 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4888 		break;
4889 	default:
4890 		break;
4891 	}
4892 }
4893 
4894 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4895 						     int me, int pipe,
4896 						     enum amdgpu_interrupt_state state)
4897 {
4898 	u32 mec_int_cntl, mec_int_cntl_reg;
4899 
4900 	/*
4901 	 * amdgpu controls only the first MEC. That's why this function only
4902 	 * handles the setting of interrupts for this specific MEC. All other
4903 	 * pipes' interrupts are set by amdkfd.
4904 	 */
4905 
4906 	if (me == 1) {
4907 		switch (pipe) {
4908 		case 0:
4909 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
4910 			break;
4911 		case 1:
4912 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
4913 			break;
4914 		case 2:
4915 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
4916 			break;
4917 		case 3:
4918 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
4919 			break;
4920 		default:
4921 			DRM_DEBUG("invalid pipe %d\n", pipe);
4922 			return;
4923 		}
4924 	} else {
4925 		DRM_DEBUG("invalid me %d\n", me);
4926 		return;
4927 	}
4928 
4929 	switch (state) {
4930 	case AMDGPU_IRQ_STATE_DISABLE:
4931 		mec_int_cntl = RREG32(mec_int_cntl_reg);
4932 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4933 					     TIME_STAMP_INT_ENABLE, 0);
4934 		WREG32(mec_int_cntl_reg, mec_int_cntl);
4935 		break;
4936 	case AMDGPU_IRQ_STATE_ENABLE:
4937 		mec_int_cntl = RREG32(mec_int_cntl_reg);
4938 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4939 					     TIME_STAMP_INT_ENABLE, 1);
4940 		WREG32(mec_int_cntl_reg, mec_int_cntl);
4941 		break;
4942 	default:
4943 		break;
4944 	}
4945 }
4946 
4947 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4948 					     struct amdgpu_irq_src *source,
4949 					     unsigned type,
4950 					     enum amdgpu_interrupt_state state)
4951 {
4952 	switch (state) {
4953 	case AMDGPU_IRQ_STATE_DISABLE:
4954 	case AMDGPU_IRQ_STATE_ENABLE:
4955 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4956 			       PRIV_REG_INT_ENABLE,
4957 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4958 		break;
4959 	default:
4960 		break;
4961 	}
4962 
4963 	return 0;
4964 }
4965 
4966 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4967 					      struct amdgpu_irq_src *source,
4968 					      unsigned type,
4969 					      enum amdgpu_interrupt_state state)
4970 {
4971 	switch (state) {
4972 	case AMDGPU_IRQ_STATE_DISABLE:
4973 	case AMDGPU_IRQ_STATE_ENABLE:
4974 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4975 			       PRIV_INSTR_INT_ENABLE,
4976 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4977 	default:
4978 		break;
4979 	}
4980 
4981 	return 0;
4982 }
4983 
4984 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)				\
4985 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
4986 			CP_ECC_ERROR_INT_ENABLE, 1)
4987 
4988 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)			\
4989 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
4990 			CP_ECC_ERROR_INT_ENABLE, 0)
4991 
4992 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
4993 					      struct amdgpu_irq_src *source,
4994 					      unsigned type,
4995 					      enum amdgpu_interrupt_state state)
4996 {
4997 	switch (state) {
4998 	case AMDGPU_IRQ_STATE_DISABLE:
4999 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5000 				CP_ECC_ERROR_INT_ENABLE, 0);
5001 		DISABLE_ECC_ON_ME_PIPE(1, 0);
5002 		DISABLE_ECC_ON_ME_PIPE(1, 1);
5003 		DISABLE_ECC_ON_ME_PIPE(1, 2);
5004 		DISABLE_ECC_ON_ME_PIPE(1, 3);
5005 		break;
5006 
5007 	case AMDGPU_IRQ_STATE_ENABLE:
5008 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5009 				CP_ECC_ERROR_INT_ENABLE, 1);
5010 		ENABLE_ECC_ON_ME_PIPE(1, 0);
5011 		ENABLE_ECC_ON_ME_PIPE(1, 1);
5012 		ENABLE_ECC_ON_ME_PIPE(1, 2);
5013 		ENABLE_ECC_ON_ME_PIPE(1, 3);
5014 		break;
5015 	default:
5016 		break;
5017 	}
5018 
5019 	return 0;
5020 }
5021 
5022 
5023 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5024 					    struct amdgpu_irq_src *src,
5025 					    unsigned type,
5026 					    enum amdgpu_interrupt_state state)
5027 {
5028 	switch (type) {
5029 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5030 		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5031 		break;
5032 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5033 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5034 		break;
5035 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5036 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5037 		break;
5038 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5039 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5040 		break;
5041 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5042 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5043 		break;
5044 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5045 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5046 		break;
5047 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5048 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5049 		break;
5050 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5051 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5052 		break;
5053 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5054 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5055 		break;
5056 	default:
5057 		break;
5058 	}
5059 	return 0;
5060 }
5061 
5062 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5063 			    struct amdgpu_irq_src *source,
5064 			    struct amdgpu_iv_entry *entry)
5065 {
5066 	int i;
5067 	u8 me_id, pipe_id, queue_id;
5068 	struct amdgpu_ring *ring;
5069 
5070 	DRM_DEBUG("IH: CP EOP\n");
5071 	me_id = (entry->ring_id & 0x0c) >> 2;
5072 	pipe_id = (entry->ring_id & 0x03) >> 0;
5073 	queue_id = (entry->ring_id & 0x70) >> 4;
5074 
5075 	switch (me_id) {
5076 	case 0:
5077 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5078 		break;
5079 	case 1:
5080 	case 2:
5081 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5082 			ring = &adev->gfx.compute_ring[i];
5083 			/* Per-queue interrupt is supported for MEC starting from VI.
5084 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
5085 			  */
5086 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5087 				amdgpu_fence_process(ring);
5088 		}
5089 		break;
5090 	}
5091 	return 0;
5092 }
5093 
5094 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5095 			   struct amdgpu_iv_entry *entry)
5096 {
5097 	u8 me_id, pipe_id, queue_id;
5098 	struct amdgpu_ring *ring;
5099 	int i;
5100 
5101 	me_id = (entry->ring_id & 0x0c) >> 2;
5102 	pipe_id = (entry->ring_id & 0x03) >> 0;
5103 	queue_id = (entry->ring_id & 0x70) >> 4;
5104 
5105 	switch (me_id) {
5106 	case 0:
5107 		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5108 		break;
5109 	case 1:
5110 	case 2:
5111 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5112 			ring = &adev->gfx.compute_ring[i];
5113 			if (ring->me == me_id && ring->pipe == pipe_id &&
5114 			    ring->queue == queue_id)
5115 				drm_sched_fault(&ring->sched);
5116 		}
5117 		break;
5118 	}
5119 }
5120 
5121 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5122 				 struct amdgpu_irq_src *source,
5123 				 struct amdgpu_iv_entry *entry)
5124 {
5125 	DRM_ERROR("Illegal register access in command stream\n");
5126 	gfx_v9_0_fault(adev, entry);
5127 	return 0;
5128 }
5129 
5130 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5131 				  struct amdgpu_irq_src *source,
5132 				  struct amdgpu_iv_entry *entry)
5133 {
5134 	DRM_ERROR("Illegal instruction in command stream\n");
5135 	gfx_v9_0_fault(adev, entry);
5136 	return 0;
5137 }
5138 
5139 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
5140 		struct amdgpu_iv_entry *entry)
5141 {
5142 	/* TODO ue will trigger an interrupt. */
5143 	kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
5144 	amdgpu_ras_reset_gpu(adev, 0);
5145 	return AMDGPU_RAS_UE;
5146 }
5147 
5148 static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev,
5149 				  struct amdgpu_irq_src *source,
5150 				  struct amdgpu_iv_entry *entry)
5151 {
5152 	struct ras_common_if *ras_if = adev->gfx.ras_if;
5153 	struct ras_dispatch_if ih_data = {
5154 		.entry = entry,
5155 	};
5156 
5157 	if (!ras_if)
5158 		return 0;
5159 
5160 	ih_data.head = *ras_if;
5161 
5162 	DRM_ERROR("CP ECC ERROR IRQ\n");
5163 	amdgpu_ras_interrupt_dispatch(adev, &ih_data);
5164 	return 0;
5165 }
5166 
5167 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
5168 	.name = "gfx_v9_0",
5169 	.early_init = gfx_v9_0_early_init,
5170 	.late_init = gfx_v9_0_late_init,
5171 	.sw_init = gfx_v9_0_sw_init,
5172 	.sw_fini = gfx_v9_0_sw_fini,
5173 	.hw_init = gfx_v9_0_hw_init,
5174 	.hw_fini = gfx_v9_0_hw_fini,
5175 	.suspend = gfx_v9_0_suspend,
5176 	.resume = gfx_v9_0_resume,
5177 	.is_idle = gfx_v9_0_is_idle,
5178 	.wait_for_idle = gfx_v9_0_wait_for_idle,
5179 	.soft_reset = gfx_v9_0_soft_reset,
5180 	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
5181 	.set_powergating_state = gfx_v9_0_set_powergating_state,
5182 	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
5183 };
5184 
5185 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
5186 	.type = AMDGPU_RING_TYPE_GFX,
5187 	.align_mask = 0xff,
5188 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
5189 	.support_64bit_ptrs = true,
5190 	.vmhub = AMDGPU_GFXHUB,
5191 	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
5192 	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
5193 	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
5194 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
5195 		5 +  /* COND_EXEC */
5196 		7 +  /* PIPELINE_SYNC */
5197 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5198 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5199 		2 + /* VM_FLUSH */
5200 		8 +  /* FENCE for VM_FLUSH */
5201 		20 + /* GDS switch */
5202 		4 + /* double SWITCH_BUFFER,
5203 		       the first COND_EXEC jump to the place just
5204 			   prior to this double SWITCH_BUFFER  */
5205 		5 + /* COND_EXEC */
5206 		7 +	 /*	HDP_flush */
5207 		4 +	 /*	VGT_flush */
5208 		14 + /*	CE_META */
5209 		31 + /*	DE_META */
5210 		3 + /* CNTX_CTRL */
5211 		5 + /* HDP_INVL */
5212 		8 + 8 + /* FENCE x2 */
5213 		2, /* SWITCH_BUFFER */
5214 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
5215 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
5216 	.emit_fence = gfx_v9_0_ring_emit_fence,
5217 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
5218 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
5219 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
5220 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
5221 	.test_ring = gfx_v9_0_ring_test_ring,
5222 	.test_ib = gfx_v9_0_ring_test_ib,
5223 	.insert_nop = amdgpu_ring_insert_nop,
5224 	.pad_ib = amdgpu_ring_generic_pad_ib,
5225 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
5226 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
5227 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
5228 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
5229 	.emit_tmz = gfx_v9_0_ring_emit_tmz,
5230 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
5231 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5232 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5233 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
5234 };
5235 
5236 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
5237 	.type = AMDGPU_RING_TYPE_COMPUTE,
5238 	.align_mask = 0xff,
5239 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
5240 	.support_64bit_ptrs = true,
5241 	.vmhub = AMDGPU_GFXHUB,
5242 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
5243 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
5244 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
5245 	.emit_frame_size =
5246 		20 + /* gfx_v9_0_ring_emit_gds_switch */
5247 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
5248 		5 + /* hdp invalidate */
5249 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
5250 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5251 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5252 		2 + /* gfx_v9_0_ring_emit_vm_flush */
5253 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
5254 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
5255 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
5256 	.emit_fence = gfx_v9_0_ring_emit_fence,
5257 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
5258 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
5259 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
5260 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
5261 	.test_ring = gfx_v9_0_ring_test_ring,
5262 	.test_ib = gfx_v9_0_ring_test_ib,
5263 	.insert_nop = amdgpu_ring_insert_nop,
5264 	.pad_ib = amdgpu_ring_generic_pad_ib,
5265 	.set_priority = gfx_v9_0_ring_set_priority_compute,
5266 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
5267 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5268 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5269 };
5270 
5271 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
5272 	.type = AMDGPU_RING_TYPE_KIQ,
5273 	.align_mask = 0xff,
5274 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
5275 	.support_64bit_ptrs = true,
5276 	.vmhub = AMDGPU_GFXHUB,
5277 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
5278 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
5279 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
5280 	.emit_frame_size =
5281 		20 + /* gfx_v9_0_ring_emit_gds_switch */
5282 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
5283 		5 + /* hdp invalidate */
5284 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
5285 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5286 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5287 		2 + /* gfx_v9_0_ring_emit_vm_flush */
5288 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
5289 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
5290 	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
5291 	.test_ring = gfx_v9_0_ring_test_ring,
5292 	.insert_nop = amdgpu_ring_insert_nop,
5293 	.pad_ib = amdgpu_ring_generic_pad_ib,
5294 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
5295 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
5296 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5297 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5298 };
5299 
5300 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
5301 {
5302 	int i;
5303 
5304 	adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
5305 
5306 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5307 		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
5308 
5309 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
5310 		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
5311 }
5312 
5313 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
5314 	.set = gfx_v9_0_set_eop_interrupt_state,
5315 	.process = gfx_v9_0_eop_irq,
5316 };
5317 
5318 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
5319 	.set = gfx_v9_0_set_priv_reg_fault_state,
5320 	.process = gfx_v9_0_priv_reg_irq,
5321 };
5322 
5323 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
5324 	.set = gfx_v9_0_set_priv_inst_fault_state,
5325 	.process = gfx_v9_0_priv_inst_irq,
5326 };
5327 
5328 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
5329 	.set = gfx_v9_0_set_cp_ecc_error_state,
5330 	.process = gfx_v9_0_cp_ecc_error_irq,
5331 };
5332 
5333 
5334 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
5335 {
5336 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5337 	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
5338 
5339 	adev->gfx.priv_reg_irq.num_types = 1;
5340 	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
5341 
5342 	adev->gfx.priv_inst_irq.num_types = 1;
5343 	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
5344 
5345 	adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
5346 	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
5347 }
5348 
5349 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
5350 {
5351 	switch (adev->asic_type) {
5352 	case CHIP_VEGA10:
5353 	case CHIP_VEGA12:
5354 	case CHIP_VEGA20:
5355 	case CHIP_RAVEN:
5356 		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
5357 		break;
5358 	default:
5359 		break;
5360 	}
5361 }
5362 
5363 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
5364 {
5365 	/* init asci gds info */
5366 	switch (adev->asic_type) {
5367 	case CHIP_VEGA10:
5368 	case CHIP_VEGA12:
5369 	case CHIP_VEGA20:
5370 		adev->gds.gds_size = 0x10000;
5371 		break;
5372 	case CHIP_RAVEN:
5373 		adev->gds.gds_size = 0x1000;
5374 		break;
5375 	default:
5376 		adev->gds.gds_size = 0x10000;
5377 		break;
5378 	}
5379 
5380 	switch (adev->asic_type) {
5381 	case CHIP_VEGA10:
5382 	case CHIP_VEGA20:
5383 		adev->gds.gds_compute_max_wave_id = 0x7ff;
5384 		break;
5385 	case CHIP_VEGA12:
5386 		adev->gds.gds_compute_max_wave_id = 0x27f;
5387 		break;
5388 	case CHIP_RAVEN:
5389 		if (adev->rev_id >= 0x8)
5390 			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
5391 		else
5392 			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
5393 		break;
5394 	default:
5395 		/* this really depends on the chip */
5396 		adev->gds.gds_compute_max_wave_id = 0x7ff;
5397 		break;
5398 	}
5399 
5400 	adev->gds.gws_size = 64;
5401 	adev->gds.oa_size = 16;
5402 }
5403 
5404 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
5405 						 u32 bitmap)
5406 {
5407 	u32 data;
5408 
5409 	if (!bitmap)
5410 		return;
5411 
5412 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5413 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5414 
5415 	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
5416 }
5417 
5418 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
5419 {
5420 	u32 data, mask;
5421 
5422 	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
5423 	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
5424 
5425 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5426 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5427 
5428 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
5429 
5430 	return (~data) & mask;
5431 }
5432 
5433 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
5434 				 struct amdgpu_cu_info *cu_info)
5435 {
5436 	int i, j, k, counter, active_cu_number = 0;
5437 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5438 	unsigned disable_masks[4 * 2];
5439 
5440 	if (!adev || !cu_info)
5441 		return -EINVAL;
5442 
5443 	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
5444 
5445 	mutex_lock(&adev->grbm_idx_mutex);
5446 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5447 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5448 			mask = 1;
5449 			ao_bitmap = 0;
5450 			counter = 0;
5451 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
5452 			if (i < 4 && j < 2)
5453 				gfx_v9_0_set_user_cu_inactive_bitmap(
5454 					adev, disable_masks[i * 2 + j]);
5455 			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
5456 			cu_info->bitmap[i][j] = bitmap;
5457 
5458 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
5459 				if (bitmap & mask) {
5460 					if (counter < adev->gfx.config.max_cu_per_sh)
5461 						ao_bitmap |= mask;
5462 					counter ++;
5463 				}
5464 				mask <<= 1;
5465 			}
5466 			active_cu_number += counter;
5467 			if (i < 2 && j < 2)
5468 				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5469 			cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
5470 		}
5471 	}
5472 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5473 	mutex_unlock(&adev->grbm_idx_mutex);
5474 
5475 	cu_info->number = active_cu_number;
5476 	cu_info->ao_cu_mask = ao_cu_mask;
5477 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
5478 
5479 	return 0;
5480 }
5481 
5482 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
5483 {
5484 	.type = AMD_IP_BLOCK_TYPE_GFX,
5485 	.major = 9,
5486 	.minor = 0,
5487 	.rev = 0,
5488 	.funcs = &gfx_v9_0_ip_funcs,
5489 };
5490