xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c (revision d40605b6d088b20827e442903022c65f0f165c84)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36 
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 #include "vega10_enum.h"
40 #include "hdp/hdp_4_0_offset.h"
41 
42 #include "soc15.h"
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
46 
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48 
49 #include "amdgpu_ras.h"
50 
51 #define GFX9_NUM_GFX_RINGS     1
52 #define GFX9_MEC_HPD_SIZE 4096
53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
55 
56 #define mmPWR_MISC_CNTL_STATUS					0x0183
57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX				0
58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT	0x0
59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT		0x1
60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK		0x00000001L
61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK		0x00000006L
62 
63 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
64 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
65 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
66 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
69 
70 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
71 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
72 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
73 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
76 
77 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
78 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
79 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
80 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
83 
84 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
85 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
86 MODULE_FIRMWARE("amdgpu/raven_me.bin");
87 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
88 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
89 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
90 
91 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
92 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
93 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
94 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
98 
99 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
100 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
101 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
102 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
105 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
106 
107 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
108 MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin");
109 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
110 
111 #define mmTCP_CHAN_STEER_0_ARCT								0x0b03
112 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX							0
113 #define mmTCP_CHAN_STEER_1_ARCT								0x0b04
114 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX							0
115 #define mmTCP_CHAN_STEER_2_ARCT								0x0b09
116 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX							0
117 #define mmTCP_CHAN_STEER_3_ARCT								0x0b0a
118 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX							0
119 #define mmTCP_CHAN_STEER_4_ARCT								0x0b0b
120 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX							0
121 #define mmTCP_CHAN_STEER_5_ARCT								0x0b0c
122 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX							0
123 
124 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
125 {
126 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
127 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
128 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
129 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
130 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
131 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
132 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
133 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
134 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
135 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
136 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
137 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
138 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
139 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
140 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
141 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
142 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
143 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
144 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
145 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
146 };
147 
148 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
149 {
150 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
151 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
152 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
153 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
154 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
155 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
156 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
157 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
158 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
159 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
160 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
161 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
162 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
163 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
164 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
165 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
166 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
167 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
168 };
169 
170 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
171 {
172 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
173 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
174 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
175 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
176 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
177 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
178 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
179 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
180 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
181 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
182 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
183 };
184 
185 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
186 {
187 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
188 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
189 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
190 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
191 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
192 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
193 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
194 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
195 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
196 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
197 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
198 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
199 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
200 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
201 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
202 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
203 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
204 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
205 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
206 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
207 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
208 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
209 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
210 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
211 };
212 
213 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
214 {
215 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
216 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
217 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
218 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
219 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
220 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
221 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
222 };
223 
224 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
225 {
226 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
227 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
228 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
229 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
230 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
231 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
232 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
233 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
234 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
235 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
236 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
237 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
238 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
239 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
240 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
241 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
242 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
243 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
244 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
245 };
246 
247 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
248 {
249 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
250 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
251 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
252 };
253 
254 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
255 {
256 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
257 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
258 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
259 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
260 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
261 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
262 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
263 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
264 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
265 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
266 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
267 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
268 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
269 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
270 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
271 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
272 };
273 
274 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
275 {
276 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
277 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
278 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
279 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
280 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
281 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
282 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
283 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
284 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
285 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
286 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
287 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
288 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
289 };
290 
291 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
292 {
293 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
294 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
295 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
296 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
297 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
298 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
299 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
300 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
301 };
302 
303 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
304 {
305 	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
306 	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
307 	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
308 	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
309 	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
310 	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
311 	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
312 	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
313 };
314 
315 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
316 {
317 	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
318 	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
319 	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
320 	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
321 	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
322 	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
323 	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
324 	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
325 };
326 
327 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
328 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
329 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
330 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
331 
332 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
333 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
334 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
335 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
336 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
337                                  struct amdgpu_cu_info *cu_info);
338 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
339 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
340 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
341 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
342 
343 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
344 {
345 	switch (adev->asic_type) {
346 	case CHIP_VEGA10:
347 		if (!amdgpu_virt_support_skip_setting(adev)) {
348 			soc15_program_register_sequence(adev,
349 							 golden_settings_gc_9_0,
350 							 ARRAY_SIZE(golden_settings_gc_9_0));
351 			soc15_program_register_sequence(adev,
352 							 golden_settings_gc_9_0_vg10,
353 							 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
354 		}
355 		break;
356 	case CHIP_VEGA12:
357 		soc15_program_register_sequence(adev,
358 						golden_settings_gc_9_2_1,
359 						ARRAY_SIZE(golden_settings_gc_9_2_1));
360 		soc15_program_register_sequence(adev,
361 						golden_settings_gc_9_2_1_vg12,
362 						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
363 		break;
364 	case CHIP_VEGA20:
365 		soc15_program_register_sequence(adev,
366 						golden_settings_gc_9_0,
367 						ARRAY_SIZE(golden_settings_gc_9_0));
368 		soc15_program_register_sequence(adev,
369 						golden_settings_gc_9_0_vg20,
370 						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
371 		break;
372 	case CHIP_ARCTURUS:
373 		soc15_program_register_sequence(adev,
374 						golden_settings_gc_9_4_1_arct,
375 						ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
376 		break;
377 	case CHIP_RAVEN:
378 		soc15_program_register_sequence(adev, golden_settings_gc_9_1,
379 						ARRAY_SIZE(golden_settings_gc_9_1));
380 		if (adev->rev_id >= 8)
381 			soc15_program_register_sequence(adev,
382 							golden_settings_gc_9_1_rv2,
383 							ARRAY_SIZE(golden_settings_gc_9_1_rv2));
384 		else
385 			soc15_program_register_sequence(adev,
386 							golden_settings_gc_9_1_rv1,
387 							ARRAY_SIZE(golden_settings_gc_9_1_rv1));
388 		break;
389 	default:
390 		break;
391 	}
392 
393 	if (adev->asic_type != CHIP_ARCTURUS)
394 		soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
395 						(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
396 }
397 
398 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
399 {
400 	adev->gfx.scratch.num_reg = 8;
401 	adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
402 	adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
403 }
404 
405 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
406 				       bool wc, uint32_t reg, uint32_t val)
407 {
408 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
409 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
410 				WRITE_DATA_DST_SEL(0) |
411 				(wc ? WR_CONFIRM : 0));
412 	amdgpu_ring_write(ring, reg);
413 	amdgpu_ring_write(ring, 0);
414 	amdgpu_ring_write(ring, val);
415 }
416 
417 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
418 				  int mem_space, int opt, uint32_t addr0,
419 				  uint32_t addr1, uint32_t ref, uint32_t mask,
420 				  uint32_t inv)
421 {
422 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
423 	amdgpu_ring_write(ring,
424 				 /* memory (1) or register (0) */
425 				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
426 				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
427 				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
428 				 WAIT_REG_MEM_ENGINE(eng_sel)));
429 
430 	if (mem_space)
431 		BUG_ON(addr0 & 0x3); /* Dword align */
432 	amdgpu_ring_write(ring, addr0);
433 	amdgpu_ring_write(ring, addr1);
434 	amdgpu_ring_write(ring, ref);
435 	amdgpu_ring_write(ring, mask);
436 	amdgpu_ring_write(ring, inv); /* poll interval */
437 }
438 
439 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
440 {
441 	struct amdgpu_device *adev = ring->adev;
442 	uint32_t scratch;
443 	uint32_t tmp = 0;
444 	unsigned i;
445 	int r;
446 
447 	r = amdgpu_gfx_scratch_get(adev, &scratch);
448 	if (r)
449 		return r;
450 
451 	WREG32(scratch, 0xCAFEDEAD);
452 	r = amdgpu_ring_alloc(ring, 3);
453 	if (r)
454 		goto error_free_scratch;
455 
456 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
457 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
458 	amdgpu_ring_write(ring, 0xDEADBEEF);
459 	amdgpu_ring_commit(ring);
460 
461 	for (i = 0; i < adev->usec_timeout; i++) {
462 		tmp = RREG32(scratch);
463 		if (tmp == 0xDEADBEEF)
464 			break;
465 		udelay(1);
466 	}
467 
468 	if (i >= adev->usec_timeout)
469 		r = -ETIMEDOUT;
470 
471 error_free_scratch:
472 	amdgpu_gfx_scratch_free(adev, scratch);
473 	return r;
474 }
475 
476 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
477 {
478 	struct amdgpu_device *adev = ring->adev;
479 	struct amdgpu_ib ib;
480 	struct dma_fence *f = NULL;
481 
482 	unsigned index;
483 	uint64_t gpu_addr;
484 	uint32_t tmp;
485 	long r;
486 
487 	r = amdgpu_device_wb_get(adev, &index);
488 	if (r)
489 		return r;
490 
491 	gpu_addr = adev->wb.gpu_addr + (index * 4);
492 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
493 	memset(&ib, 0, sizeof(ib));
494 	r = amdgpu_ib_get(adev, NULL, 16, &ib);
495 	if (r)
496 		goto err1;
497 
498 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
499 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
500 	ib.ptr[2] = lower_32_bits(gpu_addr);
501 	ib.ptr[3] = upper_32_bits(gpu_addr);
502 	ib.ptr[4] = 0xDEADBEEF;
503 	ib.length_dw = 5;
504 
505 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
506 	if (r)
507 		goto err2;
508 
509 	r = dma_fence_wait_timeout(f, false, timeout);
510 	if (r == 0) {
511 		r = -ETIMEDOUT;
512 		goto err2;
513 	} else if (r < 0) {
514 		goto err2;
515 	}
516 
517 	tmp = adev->wb.wb[index];
518 	if (tmp == 0xDEADBEEF)
519 		r = 0;
520 	else
521 		r = -EINVAL;
522 
523 err2:
524 	amdgpu_ib_free(adev, &ib, NULL);
525 	dma_fence_put(f);
526 err1:
527 	amdgpu_device_wb_free(adev, index);
528 	return r;
529 }
530 
531 
532 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
533 {
534 	release_firmware(adev->gfx.pfp_fw);
535 	adev->gfx.pfp_fw = NULL;
536 	release_firmware(adev->gfx.me_fw);
537 	adev->gfx.me_fw = NULL;
538 	release_firmware(adev->gfx.ce_fw);
539 	adev->gfx.ce_fw = NULL;
540 	release_firmware(adev->gfx.rlc_fw);
541 	adev->gfx.rlc_fw = NULL;
542 	release_firmware(adev->gfx.mec_fw);
543 	adev->gfx.mec_fw = NULL;
544 	release_firmware(adev->gfx.mec2_fw);
545 	adev->gfx.mec2_fw = NULL;
546 
547 	kfree(adev->gfx.rlc.register_list_format);
548 }
549 
550 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
551 {
552 	const struct rlc_firmware_header_v2_1 *rlc_hdr;
553 
554 	rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
555 	adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
556 	adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
557 	adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
558 	adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
559 	adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
560 	adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
561 	adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
562 	adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
563 	adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
564 	adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
565 	adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
566 	adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
567 	adev->gfx.rlc.reg_list_format_direct_reg_list_length =
568 			le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
569 }
570 
571 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
572 {
573 	adev->gfx.me_fw_write_wait = false;
574 	adev->gfx.mec_fw_write_wait = false;
575 
576 	switch (adev->asic_type) {
577 	case CHIP_VEGA10:
578 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
579 		    (adev->gfx.me_feature_version >= 42) &&
580 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
581 		    (adev->gfx.pfp_feature_version >= 42))
582 			adev->gfx.me_fw_write_wait = true;
583 
584 		if ((adev->gfx.mec_fw_version >=  0x00000193) &&
585 		    (adev->gfx.mec_feature_version >= 42))
586 			adev->gfx.mec_fw_write_wait = true;
587 		break;
588 	case CHIP_VEGA12:
589 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
590 		    (adev->gfx.me_feature_version >= 44) &&
591 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
592 		    (adev->gfx.pfp_feature_version >= 44))
593 			adev->gfx.me_fw_write_wait = true;
594 
595 		if ((adev->gfx.mec_fw_version >=  0x00000196) &&
596 		    (adev->gfx.mec_feature_version >= 44))
597 			adev->gfx.mec_fw_write_wait = true;
598 		break;
599 	case CHIP_VEGA20:
600 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
601 		    (adev->gfx.me_feature_version >= 44) &&
602 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
603 		    (adev->gfx.pfp_feature_version >= 44))
604 			adev->gfx.me_fw_write_wait = true;
605 
606 		if ((adev->gfx.mec_fw_version >=  0x00000197) &&
607 		    (adev->gfx.mec_feature_version >= 44))
608 			adev->gfx.mec_fw_write_wait = true;
609 		break;
610 	case CHIP_RAVEN:
611 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
612 		    (adev->gfx.me_feature_version >= 42) &&
613 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
614 		    (adev->gfx.pfp_feature_version >= 42))
615 			adev->gfx.me_fw_write_wait = true;
616 
617 		if ((adev->gfx.mec_fw_version >=  0x00000192) &&
618 		    (adev->gfx.mec_feature_version >= 42))
619 			adev->gfx.mec_fw_write_wait = true;
620 		break;
621 	default:
622 		break;
623 	}
624 }
625 
626 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
627 {
628 	switch (adev->asic_type) {
629 	case CHIP_VEGA10:
630 	case CHIP_VEGA12:
631 	case CHIP_VEGA20:
632 		break;
633 	case CHIP_RAVEN:
634 		if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
635 			break;
636 		if ((adev->gfx.rlc_fw_version != 106 &&
637 		     adev->gfx.rlc_fw_version < 531) ||
638 		    (adev->gfx.rlc_fw_version == 53815) ||
639 		    (adev->gfx.rlc_feature_version < 1) ||
640 		    !adev->gfx.rlc.is_rlc_v2_1)
641 			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
642 		break;
643 	default:
644 		break;
645 	}
646 }
647 
648 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
649 					  const char *chip_name)
650 {
651 	char fw_name[30];
652 	int err;
653 	struct amdgpu_firmware_info *info = NULL;
654 	const struct common_firmware_header *header = NULL;
655 	const struct gfx_firmware_header_v1_0 *cp_hdr;
656 
657 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
658 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
659 	if (err)
660 		goto out;
661 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
662 	if (err)
663 		goto out;
664 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
665 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
666 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
667 
668 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
669 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
670 	if (err)
671 		goto out;
672 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
673 	if (err)
674 		goto out;
675 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
676 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
677 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
678 
679 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
680 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
681 	if (err)
682 		goto out;
683 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
684 	if (err)
685 		goto out;
686 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
687 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
688 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
689 
690 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
691 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
692 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
693 		info->fw = adev->gfx.pfp_fw;
694 		header = (const struct common_firmware_header *)info->fw->data;
695 		adev->firmware.fw_size +=
696 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
697 
698 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
699 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
700 		info->fw = adev->gfx.me_fw;
701 		header = (const struct common_firmware_header *)info->fw->data;
702 		adev->firmware.fw_size +=
703 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
704 
705 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
706 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
707 		info->fw = adev->gfx.ce_fw;
708 		header = (const struct common_firmware_header *)info->fw->data;
709 		adev->firmware.fw_size +=
710 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
711 	}
712 
713 out:
714 	if (err) {
715 		dev_err(adev->dev,
716 			"gfx9: Failed to load firmware \"%s\"\n",
717 			fw_name);
718 		release_firmware(adev->gfx.pfp_fw);
719 		adev->gfx.pfp_fw = NULL;
720 		release_firmware(adev->gfx.me_fw);
721 		adev->gfx.me_fw = NULL;
722 		release_firmware(adev->gfx.ce_fw);
723 		adev->gfx.ce_fw = NULL;
724 	}
725 	return err;
726 }
727 
728 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
729 					  const char *chip_name)
730 {
731 	char fw_name[30];
732 	int err;
733 	struct amdgpu_firmware_info *info = NULL;
734 	const struct common_firmware_header *header = NULL;
735 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
736 	unsigned int *tmp = NULL;
737 	unsigned int i = 0;
738 	uint16_t version_major;
739 	uint16_t version_minor;
740 	uint32_t smu_version;
741 
742 	/*
743 	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
744 	 * instead of picasso_rlc.bin.
745 	 * Judgment method:
746 	 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
747 	 *          or revision >= 0xD8 && revision <= 0xDF
748 	 * otherwise is PCO FP5
749 	 */
750 	if (!strcmp(chip_name, "picasso") &&
751 		(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
752 		((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
753 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
754 	else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
755 		(smu_version >= 0x41e2b))
756 		/**
757 		*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
758 		*/
759 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
760 	else
761 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
762 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
763 	if (err)
764 		goto out;
765 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
766 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
767 
768 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
769 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
770 	if (version_major == 2 && version_minor == 1)
771 		adev->gfx.rlc.is_rlc_v2_1 = true;
772 
773 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
774 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
775 	adev->gfx.rlc.save_and_restore_offset =
776 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
777 	adev->gfx.rlc.clear_state_descriptor_offset =
778 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
779 	adev->gfx.rlc.avail_scratch_ram_locations =
780 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
781 	adev->gfx.rlc.reg_restore_list_size =
782 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
783 	adev->gfx.rlc.reg_list_format_start =
784 			le32_to_cpu(rlc_hdr->reg_list_format_start);
785 	adev->gfx.rlc.reg_list_format_separate_start =
786 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
787 	adev->gfx.rlc.starting_offsets_start =
788 			le32_to_cpu(rlc_hdr->starting_offsets_start);
789 	adev->gfx.rlc.reg_list_format_size_bytes =
790 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
791 	adev->gfx.rlc.reg_list_size_bytes =
792 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
793 	adev->gfx.rlc.register_list_format =
794 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
795 				adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
796 	if (!adev->gfx.rlc.register_list_format) {
797 		err = -ENOMEM;
798 		goto out;
799 	}
800 
801 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
802 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
803 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
804 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
805 
806 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
807 
808 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
809 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
810 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
811 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
812 
813 	if (adev->gfx.rlc.is_rlc_v2_1)
814 		gfx_v9_0_init_rlc_ext_microcode(adev);
815 
816 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
817 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
818 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
819 		info->fw = adev->gfx.rlc_fw;
820 		header = (const struct common_firmware_header *)info->fw->data;
821 		adev->firmware.fw_size +=
822 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
823 
824 		if (adev->gfx.rlc.is_rlc_v2_1 &&
825 		    adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
826 		    adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
827 		    adev->gfx.rlc.save_restore_list_srm_size_bytes) {
828 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
829 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
830 			info->fw = adev->gfx.rlc_fw;
831 			adev->firmware.fw_size +=
832 				ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
833 
834 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
835 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
836 			info->fw = adev->gfx.rlc_fw;
837 			adev->firmware.fw_size +=
838 				ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
839 
840 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
841 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
842 			info->fw = adev->gfx.rlc_fw;
843 			adev->firmware.fw_size +=
844 				ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
845 		}
846 	}
847 
848 out:
849 	if (err) {
850 		dev_err(adev->dev,
851 			"gfx9: Failed to load firmware \"%s\"\n",
852 			fw_name);
853 		release_firmware(adev->gfx.rlc_fw);
854 		adev->gfx.rlc_fw = NULL;
855 	}
856 	return err;
857 }
858 
859 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
860 					  const char *chip_name)
861 {
862 	char fw_name[30];
863 	int err;
864 	struct amdgpu_firmware_info *info = NULL;
865 	const struct common_firmware_header *header = NULL;
866 	const struct gfx_firmware_header_v1_0 *cp_hdr;
867 
868 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
869 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
870 	if (err)
871 		goto out;
872 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
873 	if (err)
874 		goto out;
875 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
876 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
877 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
878 
879 
880 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
881 	err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
882 	if (!err) {
883 		err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
884 		if (err)
885 			goto out;
886 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
887 		adev->gfx.mec2_fw->data;
888 		adev->gfx.mec2_fw_version =
889 		le32_to_cpu(cp_hdr->header.ucode_version);
890 		adev->gfx.mec2_feature_version =
891 		le32_to_cpu(cp_hdr->ucode_feature_version);
892 	} else {
893 		err = 0;
894 		adev->gfx.mec2_fw = NULL;
895 	}
896 
897 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
898 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
899 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
900 		info->fw = adev->gfx.mec_fw;
901 		header = (const struct common_firmware_header *)info->fw->data;
902 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
903 		adev->firmware.fw_size +=
904 			ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
905 
906 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
907 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
908 		info->fw = adev->gfx.mec_fw;
909 		adev->firmware.fw_size +=
910 			ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
911 
912 		if (adev->gfx.mec2_fw) {
913 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
914 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
915 			info->fw = adev->gfx.mec2_fw;
916 			header = (const struct common_firmware_header *)info->fw->data;
917 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
918 			adev->firmware.fw_size +=
919 				ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
920 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
921 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
922 			info->fw = adev->gfx.mec2_fw;
923 			adev->firmware.fw_size +=
924 				ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
925 		}
926 	}
927 
928 out:
929 	gfx_v9_0_check_if_need_gfxoff(adev);
930 	gfx_v9_0_check_fw_write_wait(adev);
931 	if (err) {
932 		dev_err(adev->dev,
933 			"gfx9: Failed to load firmware \"%s\"\n",
934 			fw_name);
935 		release_firmware(adev->gfx.mec_fw);
936 		adev->gfx.mec_fw = NULL;
937 		release_firmware(adev->gfx.mec2_fw);
938 		adev->gfx.mec2_fw = NULL;
939 	}
940 	return err;
941 }
942 
943 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
944 {
945 	const char *chip_name;
946 	int r;
947 
948 	DRM_DEBUG("\n");
949 
950 	switch (adev->asic_type) {
951 	case CHIP_VEGA10:
952 		chip_name = "vega10";
953 		break;
954 	case CHIP_VEGA12:
955 		chip_name = "vega12";
956 		break;
957 	case CHIP_VEGA20:
958 		chip_name = "vega20";
959 		break;
960 	case CHIP_RAVEN:
961 		if (adev->rev_id >= 8)
962 			chip_name = "raven2";
963 		else if (adev->pdev->device == 0x15d8)
964 			chip_name = "picasso";
965 		else
966 			chip_name = "raven";
967 		break;
968 		break;
969 	case CHIP_ARCTURUS:
970 		chip_name = "arcturus";
971 		break;
972 	default:
973 		BUG();
974 	}
975 
976 	/* No CPG in Arcturus */
977 	if (adev->asic_type != CHIP_ARCTURUS) {
978 		r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
979 		if (r)
980 			return r;
981 	}
982 
983 	r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
984 	if (r)
985 		return r;
986 
987 	r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
988 	if (r)
989 		return r;
990 
991 	return r;
992 }
993 
994 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
995 {
996 	u32 count = 0;
997 	const struct cs_section_def *sect = NULL;
998 	const struct cs_extent_def *ext = NULL;
999 
1000 	/* begin clear state */
1001 	count += 2;
1002 	/* context control state */
1003 	count += 3;
1004 
1005 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1006 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1007 			if (sect->id == SECT_CONTEXT)
1008 				count += 2 + ext->reg_count;
1009 			else
1010 				return 0;
1011 		}
1012 	}
1013 
1014 	/* end clear state */
1015 	count += 2;
1016 	/* clear state */
1017 	count += 2;
1018 
1019 	return count;
1020 }
1021 
1022 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1023 				    volatile u32 *buffer)
1024 {
1025 	u32 count = 0, i;
1026 	const struct cs_section_def *sect = NULL;
1027 	const struct cs_extent_def *ext = NULL;
1028 
1029 	if (adev->gfx.rlc.cs_data == NULL)
1030 		return;
1031 	if (buffer == NULL)
1032 		return;
1033 
1034 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1035 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1036 
1037 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1038 	buffer[count++] = cpu_to_le32(0x80000000);
1039 	buffer[count++] = cpu_to_le32(0x80000000);
1040 
1041 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1042 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1043 			if (sect->id == SECT_CONTEXT) {
1044 				buffer[count++] =
1045 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1046 				buffer[count++] = cpu_to_le32(ext->reg_index -
1047 						PACKET3_SET_CONTEXT_REG_START);
1048 				for (i = 0; i < ext->reg_count; i++)
1049 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1050 			} else {
1051 				return;
1052 			}
1053 		}
1054 	}
1055 
1056 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1057 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1058 
1059 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1060 	buffer[count++] = cpu_to_le32(0);
1061 }
1062 
1063 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1064 {
1065 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1066 	uint32_t pg_always_on_cu_num = 2;
1067 	uint32_t always_on_cu_num;
1068 	uint32_t i, j, k;
1069 	uint32_t mask, cu_bitmap, counter;
1070 
1071 	if (adev->flags & AMD_IS_APU)
1072 		always_on_cu_num = 4;
1073 	else if (adev->asic_type == CHIP_VEGA12)
1074 		always_on_cu_num = 8;
1075 	else
1076 		always_on_cu_num = 12;
1077 
1078 	mutex_lock(&adev->grbm_idx_mutex);
1079 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1080 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1081 			mask = 1;
1082 			cu_bitmap = 0;
1083 			counter = 0;
1084 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1085 
1086 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1087 				if (cu_info->bitmap[i][j] & mask) {
1088 					if (counter == pg_always_on_cu_num)
1089 						WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1090 					if (counter < always_on_cu_num)
1091 						cu_bitmap |= mask;
1092 					else
1093 						break;
1094 					counter++;
1095 				}
1096 				mask <<= 1;
1097 			}
1098 
1099 			WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1100 			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1101 		}
1102 	}
1103 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1104 	mutex_unlock(&adev->grbm_idx_mutex);
1105 }
1106 
1107 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1108 {
1109 	uint32_t data;
1110 
1111 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1112 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1113 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1114 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1115 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1116 
1117 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1118 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1119 
1120 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1121 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1122 
1123 	mutex_lock(&adev->grbm_idx_mutex);
1124 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1125 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1126 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1127 
1128 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1129 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1130 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1131 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1132 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1133 
1134 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1135 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1136 	data &= 0x0000FFFF;
1137 	data |= 0x00C00000;
1138 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1139 
1140 	/*
1141 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1142 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1143 	 */
1144 
1145 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1146 	 * but used for RLC_LB_CNTL configuration */
1147 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1148 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1149 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1150 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1151 	mutex_unlock(&adev->grbm_idx_mutex);
1152 
1153 	gfx_v9_0_init_always_on_cu_mask(adev);
1154 }
1155 
1156 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1157 {
1158 	uint32_t data;
1159 
1160 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1161 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1162 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1163 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1164 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1165 
1166 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1167 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1168 
1169 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1170 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1171 
1172 	mutex_lock(&adev->grbm_idx_mutex);
1173 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1174 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1175 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1176 
1177 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1178 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1179 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1180 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1181 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1182 
1183 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1184 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1185 	data &= 0x0000FFFF;
1186 	data |= 0x00C00000;
1187 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1188 
1189 	/*
1190 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1191 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1192 	 */
1193 
1194 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1195 	 * but used for RLC_LB_CNTL configuration */
1196 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1197 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1198 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1199 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1200 	mutex_unlock(&adev->grbm_idx_mutex);
1201 
1202 	gfx_v9_0_init_always_on_cu_mask(adev);
1203 }
1204 
1205 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1206 {
1207 	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1208 }
1209 
1210 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1211 {
1212 	return 5;
1213 }
1214 
1215 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1216 {
1217 	const struct cs_section_def *cs_data;
1218 	int r;
1219 
1220 	adev->gfx.rlc.cs_data = gfx9_cs_data;
1221 
1222 	cs_data = adev->gfx.rlc.cs_data;
1223 
1224 	if (cs_data) {
1225 		/* init clear state block */
1226 		r = amdgpu_gfx_rlc_init_csb(adev);
1227 		if (r)
1228 			return r;
1229 	}
1230 
1231 	if (adev->asic_type == CHIP_RAVEN) {
1232 		/* TODO: double check the cp_table_size for RV */
1233 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1234 		r = amdgpu_gfx_rlc_init_cpt(adev);
1235 		if (r)
1236 			return r;
1237 	}
1238 
1239 	switch (adev->asic_type) {
1240 	case CHIP_RAVEN:
1241 		gfx_v9_0_init_lbpw(adev);
1242 		break;
1243 	case CHIP_VEGA20:
1244 		gfx_v9_4_init_lbpw(adev);
1245 		break;
1246 	default:
1247 		break;
1248 	}
1249 
1250 	return 0;
1251 }
1252 
1253 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
1254 {
1255 	int r;
1256 
1257 	r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1258 	if (unlikely(r != 0))
1259 		return r;
1260 
1261 	r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1262 			AMDGPU_GEM_DOMAIN_VRAM);
1263 	if (!r)
1264 		adev->gfx.rlc.clear_state_gpu_addr =
1265 			amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1266 
1267 	amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1268 
1269 	return r;
1270 }
1271 
1272 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
1273 {
1274 	int r;
1275 
1276 	if (!adev->gfx.rlc.clear_state_obj)
1277 		return;
1278 
1279 	r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1280 	if (likely(r == 0)) {
1281 		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1282 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1283 	}
1284 }
1285 
1286 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1287 {
1288 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1289 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1290 }
1291 
1292 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1293 {
1294 	int r;
1295 	u32 *hpd;
1296 	const __le32 *fw_data;
1297 	unsigned fw_size;
1298 	u32 *fw;
1299 	size_t mec_hpd_size;
1300 
1301 	const struct gfx_firmware_header_v1_0 *mec_hdr;
1302 
1303 	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1304 
1305 	/* take ownership of the relevant compute queues */
1306 	amdgpu_gfx_compute_queue_acquire(adev);
1307 	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1308 
1309 	r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1310 				      AMDGPU_GEM_DOMAIN_VRAM,
1311 				      &adev->gfx.mec.hpd_eop_obj,
1312 				      &adev->gfx.mec.hpd_eop_gpu_addr,
1313 				      (void **)&hpd);
1314 	if (r) {
1315 		dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1316 		gfx_v9_0_mec_fini(adev);
1317 		return r;
1318 	}
1319 
1320 	memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1321 
1322 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1323 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1324 
1325 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1326 
1327 	fw_data = (const __le32 *)
1328 		(adev->gfx.mec_fw->data +
1329 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1330 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1331 
1332 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1333 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1334 				      &adev->gfx.mec.mec_fw_obj,
1335 				      &adev->gfx.mec.mec_fw_gpu_addr,
1336 				      (void **)&fw);
1337 	if (r) {
1338 		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1339 		gfx_v9_0_mec_fini(adev);
1340 		return r;
1341 	}
1342 
1343 	memcpy(fw, fw_data, fw_size);
1344 
1345 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1346 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1347 
1348 	return 0;
1349 }
1350 
1351 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1352 {
1353 	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1354 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1355 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1356 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
1357 		(SQ_IND_INDEX__FORCE_READ_MASK));
1358 	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1359 }
1360 
1361 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1362 			   uint32_t wave, uint32_t thread,
1363 			   uint32_t regno, uint32_t num, uint32_t *out)
1364 {
1365 	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1366 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1367 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1368 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
1369 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1370 		(SQ_IND_INDEX__FORCE_READ_MASK) |
1371 		(SQ_IND_INDEX__AUTO_INCR_MASK));
1372 	while (num--)
1373 		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1374 }
1375 
1376 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1377 {
1378 	/* type 1 wave data */
1379 	dst[(*no_fields)++] = 1;
1380 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1381 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1382 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1383 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1384 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1385 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1386 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1387 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1388 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1389 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1390 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1391 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1392 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1393 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1394 }
1395 
1396 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1397 				     uint32_t wave, uint32_t start,
1398 				     uint32_t size, uint32_t *dst)
1399 {
1400 	wave_read_regs(
1401 		adev, simd, wave, 0,
1402 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1403 }
1404 
1405 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1406 				     uint32_t wave, uint32_t thread,
1407 				     uint32_t start, uint32_t size,
1408 				     uint32_t *dst)
1409 {
1410 	wave_read_regs(
1411 		adev, simd, wave, thread,
1412 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1413 }
1414 
1415 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1416 				  u32 me, u32 pipe, u32 q, u32 vm)
1417 {
1418 	soc15_grbm_select(adev, me, pipe, q, vm);
1419 }
1420 
1421 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1422 	.get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1423 	.select_se_sh = &gfx_v9_0_select_se_sh,
1424 	.read_wave_data = &gfx_v9_0_read_wave_data,
1425 	.read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1426 	.read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1427 	.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q
1428 };
1429 
1430 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1431 {
1432 	u32 gb_addr_config;
1433 	int err;
1434 
1435 	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1436 
1437 	switch (adev->asic_type) {
1438 	case CHIP_VEGA10:
1439 		adev->gfx.config.max_hw_contexts = 8;
1440 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1441 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1442 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1443 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1444 		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1445 		break;
1446 	case CHIP_VEGA12:
1447 		adev->gfx.config.max_hw_contexts = 8;
1448 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1449 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1450 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1451 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1452 		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1453 		DRM_INFO("fix gfx.config for vega12\n");
1454 		break;
1455 	case CHIP_VEGA20:
1456 		adev->gfx.config.max_hw_contexts = 8;
1457 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1458 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1459 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1460 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1461 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1462 		gb_addr_config &= ~0xf3e777ff;
1463 		gb_addr_config |= 0x22014042;
1464 		/* check vbios table if gpu info is not available */
1465 		err = amdgpu_atomfirmware_get_gfx_info(adev);
1466 		if (err)
1467 			return err;
1468 		break;
1469 	case CHIP_RAVEN:
1470 		adev->gfx.config.max_hw_contexts = 8;
1471 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1472 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1473 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1474 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1475 		if (adev->rev_id >= 8)
1476 			gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1477 		else
1478 			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1479 		break;
1480 	case CHIP_ARCTURUS:
1481 		adev->gfx.config.max_hw_contexts = 8;
1482 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1483 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1484 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1485 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1486 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1487 		gb_addr_config &= ~0xf3e777ff;
1488 		gb_addr_config |= 0x22014042;
1489 		break;
1490 	default:
1491 		BUG();
1492 		break;
1493 	}
1494 
1495 	adev->gfx.config.gb_addr_config = gb_addr_config;
1496 
1497 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1498 			REG_GET_FIELD(
1499 					adev->gfx.config.gb_addr_config,
1500 					GB_ADDR_CONFIG,
1501 					NUM_PIPES);
1502 
1503 	adev->gfx.config.max_tile_pipes =
1504 		adev->gfx.config.gb_addr_config_fields.num_pipes;
1505 
1506 	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1507 			REG_GET_FIELD(
1508 					adev->gfx.config.gb_addr_config,
1509 					GB_ADDR_CONFIG,
1510 					NUM_BANKS);
1511 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1512 			REG_GET_FIELD(
1513 					adev->gfx.config.gb_addr_config,
1514 					GB_ADDR_CONFIG,
1515 					MAX_COMPRESSED_FRAGS);
1516 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1517 			REG_GET_FIELD(
1518 					adev->gfx.config.gb_addr_config,
1519 					GB_ADDR_CONFIG,
1520 					NUM_RB_PER_SE);
1521 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1522 			REG_GET_FIELD(
1523 					adev->gfx.config.gb_addr_config,
1524 					GB_ADDR_CONFIG,
1525 					NUM_SHADER_ENGINES);
1526 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1527 			REG_GET_FIELD(
1528 					adev->gfx.config.gb_addr_config,
1529 					GB_ADDR_CONFIG,
1530 					PIPE_INTERLEAVE_SIZE));
1531 
1532 	return 0;
1533 }
1534 
1535 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
1536 				   struct amdgpu_ngg_buf *ngg_buf,
1537 				   int size_se,
1538 				   int default_size_se)
1539 {
1540 	int r;
1541 
1542 	if (size_se < 0) {
1543 		dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se);
1544 		return -EINVAL;
1545 	}
1546 	size_se = size_se ? size_se : default_size_se;
1547 
1548 	ngg_buf->size = size_se * adev->gfx.config.max_shader_engines;
1549 	r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
1550 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1551 				    &ngg_buf->bo,
1552 				    &ngg_buf->gpu_addr,
1553 				    NULL);
1554 	if (r) {
1555 		dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r);
1556 		return r;
1557 	}
1558 	ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo);
1559 
1560 	return r;
1561 }
1562 
1563 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev)
1564 {
1565 	int i;
1566 
1567 	for (i = 0; i < NGG_BUF_MAX; i++)
1568 		amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo,
1569 				      &adev->gfx.ngg.buf[i].gpu_addr,
1570 				      NULL);
1571 
1572 	memset(&adev->gfx.ngg.buf[0], 0,
1573 			sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX);
1574 
1575 	adev->gfx.ngg.init = false;
1576 
1577 	return 0;
1578 }
1579 
1580 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
1581 {
1582 	int r;
1583 
1584 	if (!amdgpu_ngg || adev->gfx.ngg.init == true)
1585 		return 0;
1586 
1587 	/* GDS reserve memory: 64 bytes alignment */
1588 	adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
1589 	adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size;
1590 	adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
1591 	adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
1592 
1593 	/* Primitive Buffer */
1594 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
1595 				    amdgpu_prim_buf_per_se,
1596 				    64 * 1024);
1597 	if (r) {
1598 		dev_err(adev->dev, "Failed to create Primitive Buffer\n");
1599 		goto err;
1600 	}
1601 
1602 	/* Position Buffer */
1603 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS],
1604 				    amdgpu_pos_buf_per_se,
1605 				    256 * 1024);
1606 	if (r) {
1607 		dev_err(adev->dev, "Failed to create Position Buffer\n");
1608 		goto err;
1609 	}
1610 
1611 	/* Control Sideband */
1612 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL],
1613 				    amdgpu_cntl_sb_buf_per_se,
1614 				    256);
1615 	if (r) {
1616 		dev_err(adev->dev, "Failed to create Control Sideband Buffer\n");
1617 		goto err;
1618 	}
1619 
1620 	/* Parameter Cache, not created by default */
1621 	if (amdgpu_param_buf_per_se <= 0)
1622 		goto out;
1623 
1624 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM],
1625 				    amdgpu_param_buf_per_se,
1626 				    512 * 1024);
1627 	if (r) {
1628 		dev_err(adev->dev, "Failed to create Parameter Cache\n");
1629 		goto err;
1630 	}
1631 
1632 out:
1633 	adev->gfx.ngg.init = true;
1634 	return 0;
1635 err:
1636 	gfx_v9_0_ngg_fini(adev);
1637 	return r;
1638 }
1639 
1640 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
1641 {
1642 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
1643 	int r;
1644 	u32 data, base;
1645 
1646 	if (!amdgpu_ngg)
1647 		return 0;
1648 
1649 	/* Program buffer size */
1650 	data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE,
1651 			     adev->gfx.ngg.buf[NGG_PRIM].size >> 8);
1652 	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE,
1653 			     adev->gfx.ngg.buf[NGG_POS].size >> 8);
1654 	WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
1655 
1656 	data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE,
1657 			     adev->gfx.ngg.buf[NGG_CNTL].size >> 8);
1658 	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE,
1659 			     adev->gfx.ngg.buf[NGG_PARAM].size >> 10);
1660 	WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
1661 
1662 	/* Program buffer base address */
1663 	base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
1664 	data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
1665 	WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data);
1666 
1667 	base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
1668 	data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
1669 	WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data);
1670 
1671 	base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
1672 	data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
1673 	WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data);
1674 
1675 	base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
1676 	data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
1677 	WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data);
1678 
1679 	base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
1680 	data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
1681 	WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data);
1682 
1683 	base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
1684 	data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
1685 	WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data);
1686 
1687 	/* Clear GDS reserved memory */
1688 	r = amdgpu_ring_alloc(ring, 17);
1689 	if (r) {
1690 		DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n",
1691 			  ring->name, r);
1692 		return r;
1693 	}
1694 
1695 	gfx_v9_0_write_data_to_reg(ring, 0, false,
1696 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
1697 			           (adev->gds.gds_size +
1698 				    adev->gfx.ngg.gds_reserve_size));
1699 
1700 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
1701 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
1702 				PACKET3_DMA_DATA_DST_SEL(1) |
1703 				PACKET3_DMA_DATA_SRC_SEL(2)));
1704 	amdgpu_ring_write(ring, 0);
1705 	amdgpu_ring_write(ring, 0);
1706 	amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
1707 	amdgpu_ring_write(ring, 0);
1708 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
1709 				adev->gfx.ngg.gds_reserve_size);
1710 
1711 	gfx_v9_0_write_data_to_reg(ring, 0, false,
1712 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0);
1713 
1714 	amdgpu_ring_commit(ring);
1715 
1716 	return 0;
1717 }
1718 
1719 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1720 				      int mec, int pipe, int queue)
1721 {
1722 	int r;
1723 	unsigned irq_type;
1724 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1725 
1726 	ring = &adev->gfx.compute_ring[ring_id];
1727 
1728 	/* mec0 is me1 */
1729 	ring->me = mec + 1;
1730 	ring->pipe = pipe;
1731 	ring->queue = queue;
1732 
1733 	ring->ring_obj = NULL;
1734 	ring->use_doorbell = true;
1735 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
1736 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1737 				+ (ring_id * GFX9_MEC_HPD_SIZE);
1738 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1739 
1740 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1741 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1742 		+ ring->pipe;
1743 
1744 	/* type-2 packets are deprecated on MEC, use type-3 instead */
1745 	r = amdgpu_ring_init(adev, ring, 1024,
1746 			     &adev->gfx.eop_irq, irq_type);
1747 	if (r)
1748 		return r;
1749 
1750 
1751 	return 0;
1752 }
1753 
1754 static int gfx_v9_0_sw_init(void *handle)
1755 {
1756 	int i, j, k, r, ring_id;
1757 	struct amdgpu_ring *ring;
1758 	struct amdgpu_kiq *kiq;
1759 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1760 
1761 	switch (adev->asic_type) {
1762 	case CHIP_VEGA10:
1763 	case CHIP_VEGA12:
1764 	case CHIP_VEGA20:
1765 	case CHIP_RAVEN:
1766 	case CHIP_ARCTURUS:
1767 		adev->gfx.mec.num_mec = 2;
1768 		break;
1769 	default:
1770 		adev->gfx.mec.num_mec = 1;
1771 		break;
1772 	}
1773 
1774 	adev->gfx.mec.num_pipe_per_mec = 4;
1775 	adev->gfx.mec.num_queue_per_pipe = 8;
1776 
1777 	/* EOP Event */
1778 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
1779 	if (r)
1780 		return r;
1781 
1782 	/* Privileged reg */
1783 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
1784 			      &adev->gfx.priv_reg_irq);
1785 	if (r)
1786 		return r;
1787 
1788 	/* Privileged inst */
1789 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
1790 			      &adev->gfx.priv_inst_irq);
1791 	if (r)
1792 		return r;
1793 
1794 	/* ECC error */
1795 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
1796 			      &adev->gfx.cp_ecc_error_irq);
1797 	if (r)
1798 		return r;
1799 
1800 	/* FUE error */
1801 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
1802 			      &adev->gfx.cp_ecc_error_irq);
1803 	if (r)
1804 		return r;
1805 
1806 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1807 
1808 	gfx_v9_0_scratch_init(adev);
1809 
1810 	r = gfx_v9_0_init_microcode(adev);
1811 	if (r) {
1812 		DRM_ERROR("Failed to load gfx firmware!\n");
1813 		return r;
1814 	}
1815 
1816 	r = adev->gfx.rlc.funcs->init(adev);
1817 	if (r) {
1818 		DRM_ERROR("Failed to init rlc BOs!\n");
1819 		return r;
1820 	}
1821 
1822 	r = gfx_v9_0_mec_init(adev);
1823 	if (r) {
1824 		DRM_ERROR("Failed to init MEC BOs!\n");
1825 		return r;
1826 	}
1827 
1828 	/* set up the gfx ring */
1829 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1830 		ring = &adev->gfx.gfx_ring[i];
1831 		ring->ring_obj = NULL;
1832 		if (!i)
1833 			sprintf(ring->name, "gfx");
1834 		else
1835 			sprintf(ring->name, "gfx_%d", i);
1836 		ring->use_doorbell = true;
1837 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
1838 		r = amdgpu_ring_init(adev, ring, 1024,
1839 				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
1840 		if (r)
1841 			return r;
1842 	}
1843 
1844 	/* set up the compute queues - allocate horizontally across pipes */
1845 	ring_id = 0;
1846 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1847 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1848 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1849 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
1850 					continue;
1851 
1852 				r = gfx_v9_0_compute_ring_init(adev,
1853 							       ring_id,
1854 							       i, k, j);
1855 				if (r)
1856 					return r;
1857 
1858 				ring_id++;
1859 			}
1860 		}
1861 	}
1862 
1863 	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
1864 	if (r) {
1865 		DRM_ERROR("Failed to init KIQ BOs!\n");
1866 		return r;
1867 	}
1868 
1869 	kiq = &adev->gfx.kiq;
1870 	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
1871 	if (r)
1872 		return r;
1873 
1874 	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
1875 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
1876 	if (r)
1877 		return r;
1878 
1879 	adev->gfx.ce_ram_size = 0x8000;
1880 
1881 	r = gfx_v9_0_gpu_early_init(adev);
1882 	if (r)
1883 		return r;
1884 
1885 	r = gfx_v9_0_ngg_init(adev);
1886 	if (r)
1887 		return r;
1888 
1889 	return 0;
1890 }
1891 
1892 
1893 static int gfx_v9_0_sw_fini(void *handle)
1894 {
1895 	int i;
1896 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1897 
1898 	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
1899 			adev->gfx.ras_if) {
1900 		struct ras_common_if *ras_if = adev->gfx.ras_if;
1901 		struct ras_ih_if ih_info = {
1902 			.head = *ras_if,
1903 		};
1904 
1905 		amdgpu_ras_debugfs_remove(adev, ras_if);
1906 		amdgpu_ras_sysfs_remove(adev, ras_if);
1907 		amdgpu_ras_interrupt_remove_handler(adev,  &ih_info);
1908 		amdgpu_ras_feature_enable(adev, ras_if, 0);
1909 		kfree(ras_if);
1910 	}
1911 
1912 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1913 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1914 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
1915 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1916 
1917 	amdgpu_gfx_mqd_sw_fini(adev);
1918 	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
1919 	amdgpu_gfx_kiq_fini(adev);
1920 
1921 	gfx_v9_0_mec_fini(adev);
1922 	gfx_v9_0_ngg_fini(adev);
1923 	amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1924 	if (adev->asic_type == CHIP_RAVEN) {
1925 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
1926 				&adev->gfx.rlc.cp_table_gpu_addr,
1927 				(void **)&adev->gfx.rlc.cp_table_ptr);
1928 	}
1929 	gfx_v9_0_free_microcode(adev);
1930 
1931 	return 0;
1932 }
1933 
1934 
1935 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
1936 {
1937 	/* TODO */
1938 }
1939 
1940 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
1941 {
1942 	u32 data;
1943 
1944 	if (instance == 0xffffffff)
1945 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
1946 	else
1947 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
1948 
1949 	if (se_num == 0xffffffff)
1950 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
1951 	else
1952 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
1953 
1954 	if (sh_num == 0xffffffff)
1955 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
1956 	else
1957 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
1958 
1959 	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
1960 }
1961 
1962 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1963 {
1964 	u32 data, mask;
1965 
1966 	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
1967 	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
1968 
1969 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1970 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
1971 
1972 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
1973 					 adev->gfx.config.max_sh_per_se);
1974 
1975 	return (~data) & mask;
1976 }
1977 
1978 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
1979 {
1980 	int i, j;
1981 	u32 data;
1982 	u32 active_rbs = 0;
1983 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
1984 					adev->gfx.config.max_sh_per_se;
1985 
1986 	mutex_lock(&adev->grbm_idx_mutex);
1987 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1988 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1989 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1990 			data = gfx_v9_0_get_rb_active_bitmap(adev);
1991 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
1992 					       rb_bitmap_width_per_sh);
1993 		}
1994 	}
1995 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1996 	mutex_unlock(&adev->grbm_idx_mutex);
1997 
1998 	adev->gfx.config.backend_enable_mask = active_rbs;
1999 	adev->gfx.config.num_rbs = hweight32(active_rbs);
2000 }
2001 
2002 #define DEFAULT_SH_MEM_BASES	(0x6000)
2003 #define FIRST_COMPUTE_VMID	(8)
2004 #define LAST_COMPUTE_VMID	(16)
2005 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2006 {
2007 	int i;
2008 	uint32_t sh_mem_config;
2009 	uint32_t sh_mem_bases;
2010 
2011 	/*
2012 	 * Configure apertures:
2013 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2014 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2015 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2016 	 */
2017 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2018 
2019 	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2020 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2021 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2022 
2023 	mutex_lock(&adev->srbm_mutex);
2024 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2025 		soc15_grbm_select(adev, 0, 0, 0, i);
2026 		/* CP and shaders */
2027 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2028 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2029 	}
2030 	soc15_grbm_select(adev, 0, 0, 0, 0);
2031 	mutex_unlock(&adev->srbm_mutex);
2032 
2033 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
2034 	   acccess. These should be enabled by FW for target VMIDs. */
2035 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2036 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2037 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2038 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2039 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2040 	}
2041 }
2042 
2043 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2044 {
2045 	u32 tmp;
2046 	int i;
2047 
2048 	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2049 
2050 	gfx_v9_0_tiling_mode_table_init(adev);
2051 
2052 	gfx_v9_0_setup_rb(adev);
2053 	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2054 	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2055 
2056 	/* XXX SH_MEM regs */
2057 	/* where to put LDS, scratch, GPUVM in FSA64 space */
2058 	mutex_lock(&adev->srbm_mutex);
2059 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2060 		soc15_grbm_select(adev, 0, 0, 0, i);
2061 		/* CP and shaders */
2062 		if (i == 0) {
2063 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2064 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2065 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2066 					    !!amdgpu_noretry);
2067 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2068 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2069 		} else {
2070 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2071 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2072 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2073 					    !!amdgpu_noretry);
2074 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2075 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2076 				(adev->gmc.private_aperture_start >> 48));
2077 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2078 				(adev->gmc.shared_aperture_start >> 48));
2079 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2080 		}
2081 	}
2082 	soc15_grbm_select(adev, 0, 0, 0, 0);
2083 
2084 	mutex_unlock(&adev->srbm_mutex);
2085 
2086 	gfx_v9_0_init_compute_vmid(adev);
2087 }
2088 
2089 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2090 {
2091 	u32 i, j, k;
2092 	u32 mask;
2093 
2094 	mutex_lock(&adev->grbm_idx_mutex);
2095 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2096 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2097 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2098 			for (k = 0; k < adev->usec_timeout; k++) {
2099 				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2100 					break;
2101 				udelay(1);
2102 			}
2103 			if (k == adev->usec_timeout) {
2104 				gfx_v9_0_select_se_sh(adev, 0xffffffff,
2105 						      0xffffffff, 0xffffffff);
2106 				mutex_unlock(&adev->grbm_idx_mutex);
2107 				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2108 					 i, j);
2109 				return;
2110 			}
2111 		}
2112 	}
2113 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2114 	mutex_unlock(&adev->grbm_idx_mutex);
2115 
2116 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2117 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2118 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2119 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2120 	for (k = 0; k < adev->usec_timeout; k++) {
2121 		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2122 			break;
2123 		udelay(1);
2124 	}
2125 }
2126 
2127 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2128 					       bool enable)
2129 {
2130 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2131 
2132 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2133 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2134 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2135 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2136 
2137 	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2138 }
2139 
2140 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2141 {
2142 	/* csib */
2143 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2144 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2145 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2146 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2147 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2148 			adev->gfx.rlc.clear_state_size);
2149 }
2150 
2151 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2152 				int indirect_offset,
2153 				int list_size,
2154 				int *unique_indirect_regs,
2155 				int unique_indirect_reg_count,
2156 				int *indirect_start_offsets,
2157 				int *indirect_start_offsets_count,
2158 				int max_start_offsets_count)
2159 {
2160 	int idx;
2161 
2162 	for (; indirect_offset < list_size; indirect_offset++) {
2163 		WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2164 		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2165 		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2166 
2167 		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2168 			indirect_offset += 2;
2169 
2170 			/* look for the matching indice */
2171 			for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2172 				if (unique_indirect_regs[idx] ==
2173 					register_list_format[indirect_offset] ||
2174 					!unique_indirect_regs[idx])
2175 					break;
2176 			}
2177 
2178 			BUG_ON(idx >= unique_indirect_reg_count);
2179 
2180 			if (!unique_indirect_regs[idx])
2181 				unique_indirect_regs[idx] = register_list_format[indirect_offset];
2182 
2183 			indirect_offset++;
2184 		}
2185 	}
2186 }
2187 
2188 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2189 {
2190 	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2191 	int unique_indirect_reg_count = 0;
2192 
2193 	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2194 	int indirect_start_offsets_count = 0;
2195 
2196 	int list_size = 0;
2197 	int i = 0, j = 0;
2198 	u32 tmp = 0;
2199 
2200 	u32 *register_list_format =
2201 		kmemdup(adev->gfx.rlc.register_list_format,
2202 			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2203 	if (!register_list_format)
2204 		return -ENOMEM;
2205 
2206 	/* setup unique_indirect_regs array and indirect_start_offsets array */
2207 	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2208 	gfx_v9_1_parse_ind_reg_list(register_list_format,
2209 				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2210 				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2211 				    unique_indirect_regs,
2212 				    unique_indirect_reg_count,
2213 				    indirect_start_offsets,
2214 				    &indirect_start_offsets_count,
2215 				    ARRAY_SIZE(indirect_start_offsets));
2216 
2217 	/* enable auto inc in case it is disabled */
2218 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2219 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2220 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2221 
2222 	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2223 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2224 		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2225 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2226 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2227 			adev->gfx.rlc.register_restore[i]);
2228 
2229 	/* load indirect register */
2230 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2231 		adev->gfx.rlc.reg_list_format_start);
2232 
2233 	/* direct register portion */
2234 	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2235 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2236 			register_list_format[i]);
2237 
2238 	/* indirect register portion */
2239 	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2240 		if (register_list_format[i] == 0xFFFFFFFF) {
2241 			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2242 			continue;
2243 		}
2244 
2245 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2246 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2247 
2248 		for (j = 0; j < unique_indirect_reg_count; j++) {
2249 			if (register_list_format[i] == unique_indirect_regs[j]) {
2250 				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2251 				break;
2252 			}
2253 		}
2254 
2255 		BUG_ON(j >= unique_indirect_reg_count);
2256 
2257 		i++;
2258 	}
2259 
2260 	/* set save/restore list size */
2261 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2262 	list_size = list_size >> 1;
2263 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2264 		adev->gfx.rlc.reg_restore_list_size);
2265 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2266 
2267 	/* write the starting offsets to RLC scratch ram */
2268 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2269 		adev->gfx.rlc.starting_offsets_start);
2270 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2271 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2272 		       indirect_start_offsets[i]);
2273 
2274 	/* load unique indirect regs*/
2275 	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2276 		if (unique_indirect_regs[i] != 0) {
2277 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2278 			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2279 			       unique_indirect_regs[i] & 0x3FFFF);
2280 
2281 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2282 			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2283 			       unique_indirect_regs[i] >> 20);
2284 		}
2285 	}
2286 
2287 	kfree(register_list_format);
2288 	return 0;
2289 }
2290 
2291 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2292 {
2293 	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2294 }
2295 
2296 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2297 					     bool enable)
2298 {
2299 	uint32_t data = 0;
2300 	uint32_t default_data = 0;
2301 
2302 	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2303 	if (enable == true) {
2304 		/* enable GFXIP control over CGPG */
2305 		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2306 		if(default_data != data)
2307 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2308 
2309 		/* update status */
2310 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2311 		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2312 		if(default_data != data)
2313 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2314 	} else {
2315 		/* restore GFXIP control over GCPG */
2316 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2317 		if(default_data != data)
2318 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2319 	}
2320 }
2321 
2322 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2323 {
2324 	uint32_t data = 0;
2325 
2326 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2327 			      AMD_PG_SUPPORT_GFX_SMG |
2328 			      AMD_PG_SUPPORT_GFX_DMG)) {
2329 		/* init IDLE_POLL_COUNT = 60 */
2330 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2331 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2332 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2333 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2334 
2335 		/* init RLC PG Delay */
2336 		data = 0;
2337 		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2338 		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2339 		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2340 		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2341 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2342 
2343 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2344 		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2345 		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2346 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2347 
2348 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2349 		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2350 		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2351 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2352 
2353 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2354 		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2355 
2356 		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2357 		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2358 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2359 
2360 		pwr_10_0_gfxip_control_over_cgpg(adev, true);
2361 	}
2362 }
2363 
2364 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2365 						bool enable)
2366 {
2367 	uint32_t data = 0;
2368 	uint32_t default_data = 0;
2369 
2370 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2371 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2372 			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2373 			     enable ? 1 : 0);
2374 	if (default_data != data)
2375 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2376 }
2377 
2378 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2379 						bool enable)
2380 {
2381 	uint32_t data = 0;
2382 	uint32_t default_data = 0;
2383 
2384 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2385 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2386 			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2387 			     enable ? 1 : 0);
2388 	if(default_data != data)
2389 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2390 }
2391 
2392 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2393 					bool enable)
2394 {
2395 	uint32_t data = 0;
2396 	uint32_t default_data = 0;
2397 
2398 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2399 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2400 			     CP_PG_DISABLE,
2401 			     enable ? 0 : 1);
2402 	if(default_data != data)
2403 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2404 }
2405 
2406 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2407 						bool enable)
2408 {
2409 	uint32_t data, default_data;
2410 
2411 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2412 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2413 			     GFX_POWER_GATING_ENABLE,
2414 			     enable ? 1 : 0);
2415 	if(default_data != data)
2416 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2417 }
2418 
2419 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2420 						bool enable)
2421 {
2422 	uint32_t data, default_data;
2423 
2424 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2425 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2426 			     GFX_PIPELINE_PG_ENABLE,
2427 			     enable ? 1 : 0);
2428 	if(default_data != data)
2429 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2430 
2431 	if (!enable)
2432 		/* read any GFX register to wake up GFX */
2433 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2434 }
2435 
2436 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2437 						       bool enable)
2438 {
2439 	uint32_t data, default_data;
2440 
2441 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2442 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2443 			     STATIC_PER_CU_PG_ENABLE,
2444 			     enable ? 1 : 0);
2445 	if(default_data != data)
2446 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2447 }
2448 
2449 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2450 						bool enable)
2451 {
2452 	uint32_t data, default_data;
2453 
2454 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2455 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2456 			     DYN_PER_CU_PG_ENABLE,
2457 			     enable ? 1 : 0);
2458 	if(default_data != data)
2459 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2460 }
2461 
2462 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2463 {
2464 	gfx_v9_0_init_csb(adev);
2465 
2466 	/*
2467 	 * Rlc save restore list is workable since v2_1.
2468 	 * And it's needed by gfxoff feature.
2469 	 */
2470 	if (adev->gfx.rlc.is_rlc_v2_1) {
2471 		gfx_v9_1_init_rlc_save_restore_list(adev);
2472 		gfx_v9_0_enable_save_restore_machine(adev);
2473 	}
2474 
2475 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2476 			      AMD_PG_SUPPORT_GFX_SMG |
2477 			      AMD_PG_SUPPORT_GFX_DMG |
2478 			      AMD_PG_SUPPORT_CP |
2479 			      AMD_PG_SUPPORT_GDS |
2480 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
2481 		WREG32(mmRLC_JUMP_TABLE_RESTORE,
2482 		       adev->gfx.rlc.cp_table_gpu_addr >> 8);
2483 		gfx_v9_0_init_gfx_power_gating(adev);
2484 	}
2485 }
2486 
2487 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2488 {
2489 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2490 	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2491 	gfx_v9_0_wait_for_rlc_serdes(adev);
2492 }
2493 
2494 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2495 {
2496 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2497 	udelay(50);
2498 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2499 	udelay(50);
2500 }
2501 
2502 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2503 {
2504 #ifdef AMDGPU_RLC_DEBUG_RETRY
2505 	u32 rlc_ucode_ver;
2506 #endif
2507 
2508 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2509 	udelay(50);
2510 
2511 	/* carrizo do enable cp interrupt after cp inited */
2512 	if (!(adev->flags & AMD_IS_APU)) {
2513 		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2514 		udelay(50);
2515 	}
2516 
2517 #ifdef AMDGPU_RLC_DEBUG_RETRY
2518 	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
2519 	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2520 	if(rlc_ucode_ver == 0x108) {
2521 		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2522 				rlc_ucode_ver, adev->gfx.rlc_fw_version);
2523 		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2524 		 * default is 0x9C4 to create a 100us interval */
2525 		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2526 		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2527 		 * to disable the page fault retry interrupts, default is
2528 		 * 0x100 (256) */
2529 		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2530 	}
2531 #endif
2532 }
2533 
2534 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2535 {
2536 	const struct rlc_firmware_header_v2_0 *hdr;
2537 	const __le32 *fw_data;
2538 	unsigned i, fw_size;
2539 
2540 	if (!adev->gfx.rlc_fw)
2541 		return -EINVAL;
2542 
2543 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2544 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
2545 
2546 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2547 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2548 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2549 
2550 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2551 			RLCG_UCODE_LOADING_START_ADDRESS);
2552 	for (i = 0; i < fw_size; i++)
2553 		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2554 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2555 
2556 	return 0;
2557 }
2558 
2559 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2560 {
2561 	int r;
2562 
2563 	if (amdgpu_sriov_vf(adev)) {
2564 		gfx_v9_0_init_csb(adev);
2565 		return 0;
2566 	}
2567 
2568 	adev->gfx.rlc.funcs->stop(adev);
2569 
2570 	/* disable CG */
2571 	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2572 
2573 	gfx_v9_0_init_pg(adev);
2574 
2575 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2576 		/* legacy rlc firmware loading */
2577 		r = gfx_v9_0_rlc_load_microcode(adev);
2578 		if (r)
2579 			return r;
2580 	}
2581 
2582 	switch (adev->asic_type) {
2583 	case CHIP_RAVEN:
2584 		if (amdgpu_lbpw == 0)
2585 			gfx_v9_0_enable_lbpw(adev, false);
2586 		else
2587 			gfx_v9_0_enable_lbpw(adev, true);
2588 		break;
2589 	case CHIP_VEGA20:
2590 		if (amdgpu_lbpw > 0)
2591 			gfx_v9_0_enable_lbpw(adev, true);
2592 		else
2593 			gfx_v9_0_enable_lbpw(adev, false);
2594 		break;
2595 	default:
2596 		break;
2597 	}
2598 
2599 	adev->gfx.rlc.funcs->start(adev);
2600 
2601 	return 0;
2602 }
2603 
2604 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2605 {
2606 	int i;
2607 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
2608 
2609 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2610 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2611 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
2612 	if (!enable) {
2613 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2614 			adev->gfx.gfx_ring[i].sched.ready = false;
2615 	}
2616 	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
2617 	udelay(50);
2618 }
2619 
2620 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2621 {
2622 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
2623 	const struct gfx_firmware_header_v1_0 *ce_hdr;
2624 	const struct gfx_firmware_header_v1_0 *me_hdr;
2625 	const __le32 *fw_data;
2626 	unsigned i, fw_size;
2627 
2628 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2629 		return -EINVAL;
2630 
2631 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2632 		adev->gfx.pfp_fw->data;
2633 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2634 		adev->gfx.ce_fw->data;
2635 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
2636 		adev->gfx.me_fw->data;
2637 
2638 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2639 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2640 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2641 
2642 	gfx_v9_0_cp_gfx_enable(adev, false);
2643 
2644 	/* PFP */
2645 	fw_data = (const __le32 *)
2646 		(adev->gfx.pfp_fw->data +
2647 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2648 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2649 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
2650 	for (i = 0; i < fw_size; i++)
2651 		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2652 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2653 
2654 	/* CE */
2655 	fw_data = (const __le32 *)
2656 		(adev->gfx.ce_fw->data +
2657 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2658 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2659 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
2660 	for (i = 0; i < fw_size; i++)
2661 		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2662 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2663 
2664 	/* ME */
2665 	fw_data = (const __le32 *)
2666 		(adev->gfx.me_fw->data +
2667 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2668 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2669 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
2670 	for (i = 0; i < fw_size; i++)
2671 		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2672 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2673 
2674 	return 0;
2675 }
2676 
2677 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
2678 {
2679 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2680 	const struct cs_section_def *sect = NULL;
2681 	const struct cs_extent_def *ext = NULL;
2682 	int r, i, tmp;
2683 
2684 	/* init the CP */
2685 	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
2686 	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
2687 
2688 	gfx_v9_0_cp_gfx_enable(adev, true);
2689 
2690 	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
2691 	if (r) {
2692 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
2693 		return r;
2694 	}
2695 
2696 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2697 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2698 
2699 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2700 	amdgpu_ring_write(ring, 0x80000000);
2701 	amdgpu_ring_write(ring, 0x80000000);
2702 
2703 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
2704 		for (ext = sect->section; ext->extent != NULL; ++ext) {
2705 			if (sect->id == SECT_CONTEXT) {
2706 				amdgpu_ring_write(ring,
2707 				       PACKET3(PACKET3_SET_CONTEXT_REG,
2708 					       ext->reg_count));
2709 				amdgpu_ring_write(ring,
2710 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
2711 				for (i = 0; i < ext->reg_count; i++)
2712 					amdgpu_ring_write(ring, ext->extent[i]);
2713 			}
2714 		}
2715 	}
2716 
2717 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2718 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2719 
2720 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2721 	amdgpu_ring_write(ring, 0);
2722 
2723 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2724 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2725 	amdgpu_ring_write(ring, 0x8000);
2726 	amdgpu_ring_write(ring, 0x8000);
2727 
2728 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
2729 	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
2730 		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
2731 	amdgpu_ring_write(ring, tmp);
2732 	amdgpu_ring_write(ring, 0);
2733 
2734 	amdgpu_ring_commit(ring);
2735 
2736 	return 0;
2737 }
2738 
2739 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
2740 {
2741 	struct amdgpu_ring *ring;
2742 	u32 tmp;
2743 	u32 rb_bufsz;
2744 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
2745 
2746 	/* Set the write pointer delay */
2747 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
2748 
2749 	/* set the RB to use vmid 0 */
2750 	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
2751 
2752 	/* Set ring buffer size */
2753 	ring = &adev->gfx.gfx_ring[0];
2754 	rb_bufsz = order_base_2(ring->ring_size / 8);
2755 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
2756 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
2757 #ifdef __BIG_ENDIAN
2758 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
2759 #endif
2760 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2761 
2762 	/* Initialize the ring buffer's write pointers */
2763 	ring->wptr = 0;
2764 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
2765 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
2766 
2767 	/* set the wb address wether it's enabled or not */
2768 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2769 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
2770 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
2771 
2772 	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2773 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
2774 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
2775 
2776 	mdelay(1);
2777 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2778 
2779 	rb_addr = ring->gpu_addr >> 8;
2780 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
2781 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
2782 
2783 	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
2784 	if (ring->use_doorbell) {
2785 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2786 				    DOORBELL_OFFSET, ring->doorbell_index);
2787 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2788 				    DOORBELL_EN, 1);
2789 	} else {
2790 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
2791 	}
2792 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
2793 
2794 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
2795 			DOORBELL_RANGE_LOWER, ring->doorbell_index);
2796 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
2797 
2798 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
2799 		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
2800 
2801 
2802 	/* start the ring */
2803 	gfx_v9_0_cp_gfx_start(adev);
2804 	ring->sched.ready = true;
2805 
2806 	return 0;
2807 }
2808 
2809 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
2810 {
2811 	int i;
2812 
2813 	if (enable) {
2814 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
2815 	} else {
2816 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
2817 			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
2818 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
2819 			adev->gfx.compute_ring[i].sched.ready = false;
2820 		adev->gfx.kiq.ring.sched.ready = false;
2821 	}
2822 	udelay(50);
2823 }
2824 
2825 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
2826 {
2827 	const struct gfx_firmware_header_v1_0 *mec_hdr;
2828 	const __le32 *fw_data;
2829 	unsigned i;
2830 	u32 tmp;
2831 
2832 	if (!adev->gfx.mec_fw)
2833 		return -EINVAL;
2834 
2835 	gfx_v9_0_cp_compute_enable(adev, false);
2836 
2837 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
2838 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
2839 
2840 	fw_data = (const __le32 *)
2841 		(adev->gfx.mec_fw->data +
2842 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
2843 	tmp = 0;
2844 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
2845 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2846 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
2847 
2848 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
2849 		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
2850 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
2851 		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
2852 
2853 	/* MEC1 */
2854 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
2855 			 mec_hdr->jt_offset);
2856 	for (i = 0; i < mec_hdr->jt_size; i++)
2857 		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
2858 			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
2859 
2860 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
2861 			adev->gfx.mec_fw_version);
2862 	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
2863 
2864 	return 0;
2865 }
2866 
2867 /* KIQ functions */
2868 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
2869 {
2870 	uint32_t tmp;
2871 	struct amdgpu_device *adev = ring->adev;
2872 
2873 	/* tell RLC which is KIQ queue */
2874 	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
2875 	tmp &= 0xffffff00;
2876 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
2877 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2878 	tmp |= 0x80;
2879 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2880 }
2881 
2882 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
2883 {
2884 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
2885 	uint64_t queue_mask = 0;
2886 	int r, i;
2887 
2888 	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
2889 		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
2890 			continue;
2891 
2892 		/* This situation may be hit in the future if a new HW
2893 		 * generation exposes more than 64 queues. If so, the
2894 		 * definition of queue_mask needs updating */
2895 		if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
2896 			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
2897 			break;
2898 		}
2899 
2900 		queue_mask |= (1ull << i);
2901 	}
2902 
2903 	r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
2904 	if (r) {
2905 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
2906 		return r;
2907 	}
2908 
2909 	/* set resources */
2910 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
2911 	amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
2912 			  PACKET3_SET_RESOURCES_QUEUE_TYPE(0));	/* vmid_mask:0 queue_type:0 (KIQ) */
2913 	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
2914 	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
2915 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
2916 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
2917 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
2918 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
2919 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2920 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
2921 		uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
2922 		uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2923 
2924 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
2925 		/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
2926 		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
2927 				  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
2928 				  PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
2929 				  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
2930 				  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
2931 				  PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
2932 				  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
2933 				  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
2934 				  PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
2935 				  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
2936 		amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
2937 		amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
2938 		amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
2939 		amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
2940 		amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
2941 	}
2942 
2943 	r = amdgpu_ring_test_helper(kiq_ring);
2944 	if (r)
2945 		DRM_ERROR("KCQ enable failed\n");
2946 
2947 	return r;
2948 }
2949 
2950 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
2951 {
2952 	struct amdgpu_device *adev = ring->adev;
2953 	struct v9_mqd *mqd = ring->mqd_ptr;
2954 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
2955 	uint32_t tmp;
2956 
2957 	mqd->header = 0xC0310800;
2958 	mqd->compute_pipelinestat_enable = 0x00000001;
2959 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
2960 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
2961 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
2962 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
2963 	mqd->compute_misc_reserved = 0x00000003;
2964 
2965 	mqd->dynamic_cu_mask_addr_lo =
2966 		lower_32_bits(ring->mqd_gpu_addr
2967 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2968 	mqd->dynamic_cu_mask_addr_hi =
2969 		upper_32_bits(ring->mqd_gpu_addr
2970 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2971 
2972 	eop_base_addr = ring->eop_gpu_addr >> 8;
2973 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
2974 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
2975 
2976 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2977 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
2978 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
2979 			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
2980 
2981 	mqd->cp_hqd_eop_control = tmp;
2982 
2983 	/* enable doorbell? */
2984 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
2985 
2986 	if (ring->use_doorbell) {
2987 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2988 				    DOORBELL_OFFSET, ring->doorbell_index);
2989 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2990 				    DOORBELL_EN, 1);
2991 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2992 				    DOORBELL_SOURCE, 0);
2993 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2994 				    DOORBELL_HIT, 0);
2995 	} else {
2996 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2997 					 DOORBELL_EN, 0);
2998 	}
2999 
3000 	mqd->cp_hqd_pq_doorbell_control = tmp;
3001 
3002 	/* disable the queue if it's active */
3003 	ring->wptr = 0;
3004 	mqd->cp_hqd_dequeue_request = 0;
3005 	mqd->cp_hqd_pq_rptr = 0;
3006 	mqd->cp_hqd_pq_wptr_lo = 0;
3007 	mqd->cp_hqd_pq_wptr_hi = 0;
3008 
3009 	/* set the pointer to the MQD */
3010 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3011 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3012 
3013 	/* set MQD vmid to 0 */
3014 	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3015 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3016 	mqd->cp_mqd_control = tmp;
3017 
3018 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3019 	hqd_gpu_addr = ring->gpu_addr >> 8;
3020 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3021 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3022 
3023 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3024 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3025 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3026 			    (order_base_2(ring->ring_size / 4) - 1));
3027 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3028 			((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3029 #ifdef __BIG_ENDIAN
3030 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3031 #endif
3032 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3033 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3034 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3035 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3036 	mqd->cp_hqd_pq_control = tmp;
3037 
3038 	/* set the wb address whether it's enabled or not */
3039 	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3040 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3041 	mqd->cp_hqd_pq_rptr_report_addr_hi =
3042 		upper_32_bits(wb_gpu_addr) & 0xffff;
3043 
3044 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3045 	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3046 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3047 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3048 
3049 	tmp = 0;
3050 	/* enable the doorbell if requested */
3051 	if (ring->use_doorbell) {
3052 		tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3053 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3054 				DOORBELL_OFFSET, ring->doorbell_index);
3055 
3056 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3057 					 DOORBELL_EN, 1);
3058 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3059 					 DOORBELL_SOURCE, 0);
3060 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3061 					 DOORBELL_HIT, 0);
3062 	}
3063 
3064 	mqd->cp_hqd_pq_doorbell_control = tmp;
3065 
3066 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3067 	ring->wptr = 0;
3068 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3069 
3070 	/* set the vmid for the queue */
3071 	mqd->cp_hqd_vmid = 0;
3072 
3073 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3074 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3075 	mqd->cp_hqd_persistent_state = tmp;
3076 
3077 	/* set MIN_IB_AVAIL_SIZE */
3078 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3079 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3080 	mqd->cp_hqd_ib_control = tmp;
3081 
3082 	/* activate the queue */
3083 	mqd->cp_hqd_active = 1;
3084 
3085 	return 0;
3086 }
3087 
3088 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3089 {
3090 	struct amdgpu_device *adev = ring->adev;
3091 	struct v9_mqd *mqd = ring->mqd_ptr;
3092 	int j;
3093 
3094 	/* disable wptr polling */
3095 	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3096 
3097 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3098 	       mqd->cp_hqd_eop_base_addr_lo);
3099 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3100 	       mqd->cp_hqd_eop_base_addr_hi);
3101 
3102 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3103 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3104 	       mqd->cp_hqd_eop_control);
3105 
3106 	/* enable doorbell? */
3107 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3108 	       mqd->cp_hqd_pq_doorbell_control);
3109 
3110 	/* disable the queue if it's active */
3111 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3112 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3113 		for (j = 0; j < adev->usec_timeout; j++) {
3114 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3115 				break;
3116 			udelay(1);
3117 		}
3118 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3119 		       mqd->cp_hqd_dequeue_request);
3120 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3121 		       mqd->cp_hqd_pq_rptr);
3122 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3123 		       mqd->cp_hqd_pq_wptr_lo);
3124 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3125 		       mqd->cp_hqd_pq_wptr_hi);
3126 	}
3127 
3128 	/* set the pointer to the MQD */
3129 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3130 	       mqd->cp_mqd_base_addr_lo);
3131 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3132 	       mqd->cp_mqd_base_addr_hi);
3133 
3134 	/* set MQD vmid to 0 */
3135 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3136 	       mqd->cp_mqd_control);
3137 
3138 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3139 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3140 	       mqd->cp_hqd_pq_base_lo);
3141 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3142 	       mqd->cp_hqd_pq_base_hi);
3143 
3144 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3145 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3146 	       mqd->cp_hqd_pq_control);
3147 
3148 	/* set the wb address whether it's enabled or not */
3149 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3150 				mqd->cp_hqd_pq_rptr_report_addr_lo);
3151 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3152 				mqd->cp_hqd_pq_rptr_report_addr_hi);
3153 
3154 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3155 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3156 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
3157 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3158 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3159 
3160 	/* enable the doorbell if requested */
3161 	if (ring->use_doorbell) {
3162 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3163 					(adev->doorbell_index.kiq * 2) << 2);
3164 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3165 					(adev->doorbell_index.userqueue_end * 2) << 2);
3166 	}
3167 
3168 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3169 	       mqd->cp_hqd_pq_doorbell_control);
3170 
3171 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3172 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3173 	       mqd->cp_hqd_pq_wptr_lo);
3174 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3175 	       mqd->cp_hqd_pq_wptr_hi);
3176 
3177 	/* set the vmid for the queue */
3178 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3179 
3180 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3181 	       mqd->cp_hqd_persistent_state);
3182 
3183 	/* activate the queue */
3184 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3185 	       mqd->cp_hqd_active);
3186 
3187 	if (ring->use_doorbell)
3188 		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3189 
3190 	return 0;
3191 }
3192 
3193 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3194 {
3195 	struct amdgpu_device *adev = ring->adev;
3196 	int j;
3197 
3198 	/* disable the queue if it's active */
3199 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3200 
3201 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3202 
3203 		for (j = 0; j < adev->usec_timeout; j++) {
3204 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3205 				break;
3206 			udelay(1);
3207 		}
3208 
3209 		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3210 			DRM_DEBUG("KIQ dequeue request failed.\n");
3211 
3212 			/* Manual disable if dequeue request times out */
3213 			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3214 		}
3215 
3216 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3217 		      0);
3218 	}
3219 
3220 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3221 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3222 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3223 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3224 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3225 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3226 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3227 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3228 
3229 	return 0;
3230 }
3231 
3232 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3233 {
3234 	struct amdgpu_device *adev = ring->adev;
3235 	struct v9_mqd *mqd = ring->mqd_ptr;
3236 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3237 
3238 	gfx_v9_0_kiq_setting(ring);
3239 
3240 	if (adev->in_gpu_reset) { /* for GPU_RESET case */
3241 		/* reset MQD to a clean status */
3242 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3243 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3244 
3245 		/* reset ring buffer */
3246 		ring->wptr = 0;
3247 		amdgpu_ring_clear_ring(ring);
3248 
3249 		mutex_lock(&adev->srbm_mutex);
3250 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3251 		gfx_v9_0_kiq_init_register(ring);
3252 		soc15_grbm_select(adev, 0, 0, 0, 0);
3253 		mutex_unlock(&adev->srbm_mutex);
3254 	} else {
3255 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3256 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3257 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3258 		mutex_lock(&adev->srbm_mutex);
3259 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3260 		gfx_v9_0_mqd_init(ring);
3261 		gfx_v9_0_kiq_init_register(ring);
3262 		soc15_grbm_select(adev, 0, 0, 0, 0);
3263 		mutex_unlock(&adev->srbm_mutex);
3264 
3265 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3266 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3267 	}
3268 
3269 	return 0;
3270 }
3271 
3272 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3273 {
3274 	struct amdgpu_device *adev = ring->adev;
3275 	struct v9_mqd *mqd = ring->mqd_ptr;
3276 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
3277 
3278 	if (!adev->in_gpu_reset && !adev->in_suspend) {
3279 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3280 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3281 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3282 		mutex_lock(&adev->srbm_mutex);
3283 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3284 		gfx_v9_0_mqd_init(ring);
3285 		soc15_grbm_select(adev, 0, 0, 0, 0);
3286 		mutex_unlock(&adev->srbm_mutex);
3287 
3288 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3289 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3290 	} else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3291 		/* reset MQD to a clean status */
3292 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3293 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3294 
3295 		/* reset ring buffer */
3296 		ring->wptr = 0;
3297 		amdgpu_ring_clear_ring(ring);
3298 	} else {
3299 		amdgpu_ring_clear_ring(ring);
3300 	}
3301 
3302 	return 0;
3303 }
3304 
3305 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3306 {
3307 	struct amdgpu_ring *ring;
3308 	int r;
3309 
3310 	ring = &adev->gfx.kiq.ring;
3311 
3312 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
3313 	if (unlikely(r != 0))
3314 		return r;
3315 
3316 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3317 	if (unlikely(r != 0))
3318 		return r;
3319 
3320 	gfx_v9_0_kiq_init_queue(ring);
3321 	amdgpu_bo_kunmap(ring->mqd_obj);
3322 	ring->mqd_ptr = NULL;
3323 	amdgpu_bo_unreserve(ring->mqd_obj);
3324 	ring->sched.ready = true;
3325 	return 0;
3326 }
3327 
3328 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3329 {
3330 	struct amdgpu_ring *ring = NULL;
3331 	int r = 0, i;
3332 
3333 	gfx_v9_0_cp_compute_enable(adev, true);
3334 
3335 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3336 		ring = &adev->gfx.compute_ring[i];
3337 
3338 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3339 		if (unlikely(r != 0))
3340 			goto done;
3341 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3342 		if (!r) {
3343 			r = gfx_v9_0_kcq_init_queue(ring);
3344 			amdgpu_bo_kunmap(ring->mqd_obj);
3345 			ring->mqd_ptr = NULL;
3346 		}
3347 		amdgpu_bo_unreserve(ring->mqd_obj);
3348 		if (r)
3349 			goto done;
3350 	}
3351 
3352 	r = gfx_v9_0_kiq_kcq_enable(adev);
3353 done:
3354 	return r;
3355 }
3356 
3357 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3358 {
3359 	int r, i;
3360 	struct amdgpu_ring *ring;
3361 
3362 	if (!(adev->flags & AMD_IS_APU))
3363 		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3364 
3365 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3366 		if (adev->asic_type != CHIP_ARCTURUS) {
3367 			/* legacy firmware loading */
3368 			r = gfx_v9_0_cp_gfx_load_microcode(adev);
3369 			if (r)
3370 				return r;
3371 		}
3372 
3373 		r = gfx_v9_0_cp_compute_load_microcode(adev);
3374 		if (r)
3375 			return r;
3376 	}
3377 
3378 	r = gfx_v9_0_kiq_resume(adev);
3379 	if (r)
3380 		return r;
3381 
3382 	if (adev->asic_type != CHIP_ARCTURUS) {
3383 		r = gfx_v9_0_cp_gfx_resume(adev);
3384 		if (r)
3385 			return r;
3386 	}
3387 
3388 	r = gfx_v9_0_kcq_resume(adev);
3389 	if (r)
3390 		return r;
3391 
3392 	if (adev->asic_type != CHIP_ARCTURUS) {
3393 		ring = &adev->gfx.gfx_ring[0];
3394 		r = amdgpu_ring_test_helper(ring);
3395 		if (r)
3396 			return r;
3397 	}
3398 
3399 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3400 		ring = &adev->gfx.compute_ring[i];
3401 		amdgpu_ring_test_helper(ring);
3402 	}
3403 
3404 	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3405 
3406 	return 0;
3407 }
3408 
3409 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3410 {
3411 	if (adev->asic_type != CHIP_ARCTURUS)
3412 		gfx_v9_0_cp_gfx_enable(adev, enable);
3413 	gfx_v9_0_cp_compute_enable(adev, enable);
3414 }
3415 
3416 static int gfx_v9_0_hw_init(void *handle)
3417 {
3418 	int r;
3419 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3420 
3421 	gfx_v9_0_init_golden_registers(adev);
3422 
3423 	gfx_v9_0_constants_init(adev);
3424 
3425 	r = gfx_v9_0_csb_vram_pin(adev);
3426 	if (r)
3427 		return r;
3428 
3429 	r = adev->gfx.rlc.funcs->resume(adev);
3430 	if (r)
3431 		return r;
3432 
3433 	r = gfx_v9_0_cp_resume(adev);
3434 	if (r)
3435 		return r;
3436 
3437 	if (adev->asic_type != CHIP_ARCTURUS) {
3438 		r = gfx_v9_0_ngg_en(adev);
3439 		if (r)
3440 			return r;
3441 	}
3442 
3443 	return r;
3444 }
3445 
3446 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
3447 {
3448 	int r, i;
3449 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3450 
3451 	r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
3452 	if (r)
3453 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3454 
3455 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3456 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3457 
3458 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
3459 		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3460 						PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
3461 						PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
3462 						PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
3463 						PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
3464 		amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
3465 		amdgpu_ring_write(kiq_ring, 0);
3466 		amdgpu_ring_write(kiq_ring, 0);
3467 		amdgpu_ring_write(kiq_ring, 0);
3468 	}
3469 	r = amdgpu_ring_test_helper(kiq_ring);
3470 	if (r)
3471 		DRM_ERROR("KCQ disable failed\n");
3472 
3473 	return r;
3474 }
3475 
3476 static int gfx_v9_0_hw_fini(void *handle)
3477 {
3478 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3479 
3480 	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3481 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3482 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3483 
3484 	/* disable KCQ to avoid CPC touch memory not valid anymore */
3485 	gfx_v9_0_kcq_disable(adev);
3486 
3487 	if (amdgpu_sriov_vf(adev)) {
3488 		gfx_v9_0_cp_gfx_enable(adev, false);
3489 		/* must disable polling for SRIOV when hw finished, otherwise
3490 		 * CPC engine may still keep fetching WB address which is already
3491 		 * invalid after sw finished and trigger DMAR reading error in
3492 		 * hypervisor side.
3493 		 */
3494 		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3495 		return 0;
3496 	}
3497 
3498 	/* Use deinitialize sequence from CAIL when unbinding device from driver,
3499 	 * otherwise KIQ is hanging when binding back
3500 	 */
3501 	if (!adev->in_gpu_reset && !adev->in_suspend) {
3502 		mutex_lock(&adev->srbm_mutex);
3503 		soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3504 				adev->gfx.kiq.ring.pipe,
3505 				adev->gfx.kiq.ring.queue, 0);
3506 		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3507 		soc15_grbm_select(adev, 0, 0, 0, 0);
3508 		mutex_unlock(&adev->srbm_mutex);
3509 	}
3510 
3511 	gfx_v9_0_cp_enable(adev, false);
3512 	adev->gfx.rlc.funcs->stop(adev);
3513 
3514 	gfx_v9_0_csb_vram_unpin(adev);
3515 
3516 	return 0;
3517 }
3518 
3519 static int gfx_v9_0_suspend(void *handle)
3520 {
3521 	return gfx_v9_0_hw_fini(handle);
3522 }
3523 
3524 static int gfx_v9_0_resume(void *handle)
3525 {
3526 	return gfx_v9_0_hw_init(handle);
3527 }
3528 
3529 static bool gfx_v9_0_is_idle(void *handle)
3530 {
3531 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3532 
3533 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3534 				GRBM_STATUS, GUI_ACTIVE))
3535 		return false;
3536 	else
3537 		return true;
3538 }
3539 
3540 static int gfx_v9_0_wait_for_idle(void *handle)
3541 {
3542 	unsigned i;
3543 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3544 
3545 	for (i = 0; i < adev->usec_timeout; i++) {
3546 		if (gfx_v9_0_is_idle(handle))
3547 			return 0;
3548 		udelay(1);
3549 	}
3550 	return -ETIMEDOUT;
3551 }
3552 
3553 static int gfx_v9_0_soft_reset(void *handle)
3554 {
3555 	u32 grbm_soft_reset = 0;
3556 	u32 tmp;
3557 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3558 
3559 	/* GRBM_STATUS */
3560 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3561 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3562 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3563 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3564 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3565 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3566 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3567 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3568 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3569 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3570 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3571 	}
3572 
3573 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3574 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3575 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3576 	}
3577 
3578 	/* GRBM_STATUS2 */
3579 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3580 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3581 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3582 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3583 
3584 
3585 	if (grbm_soft_reset) {
3586 		/* stop the rlc */
3587 		adev->gfx.rlc.funcs->stop(adev);
3588 
3589 		if (adev->asic_type != CHIP_ARCTURUS)
3590 			/* Disable GFX parsing/prefetching */
3591 			gfx_v9_0_cp_gfx_enable(adev, false);
3592 
3593 		/* Disable MEC parsing/prefetching */
3594 		gfx_v9_0_cp_compute_enable(adev, false);
3595 
3596 		if (grbm_soft_reset) {
3597 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3598 			tmp |= grbm_soft_reset;
3599 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3600 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3601 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3602 
3603 			udelay(50);
3604 
3605 			tmp &= ~grbm_soft_reset;
3606 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3607 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3608 		}
3609 
3610 		/* Wait a little for things to settle down */
3611 		udelay(50);
3612 	}
3613 	return 0;
3614 }
3615 
3616 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3617 {
3618 	uint64_t clock;
3619 
3620 	mutex_lock(&adev->gfx.gpu_clock_mutex);
3621 	WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
3622 	clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
3623 		((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
3624 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
3625 	return clock;
3626 }
3627 
3628 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
3629 					  uint32_t vmid,
3630 					  uint32_t gds_base, uint32_t gds_size,
3631 					  uint32_t gws_base, uint32_t gws_size,
3632 					  uint32_t oa_base, uint32_t oa_size)
3633 {
3634 	struct amdgpu_device *adev = ring->adev;
3635 
3636 	/* GDS Base */
3637 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3638 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
3639 				   gds_base);
3640 
3641 	/* GDS Size */
3642 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3643 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
3644 				   gds_size);
3645 
3646 	/* GWS */
3647 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3648 				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
3649 				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
3650 
3651 	/* OA */
3652 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3653 				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
3654 				   (1 << (oa_size + oa_base)) - (1 << oa_base));
3655 }
3656 
3657 static const u32 vgpr_init_compute_shader[] =
3658 {
3659 	0xb07c0000, 0xbe8000ff,
3660 	0x000000f8, 0xbf110800,
3661 	0x7e000280, 0x7e020280,
3662 	0x7e040280, 0x7e060280,
3663 	0x7e080280, 0x7e0a0280,
3664 	0x7e0c0280, 0x7e0e0280,
3665 	0x80808800, 0xbe803200,
3666 	0xbf84fff5, 0xbf9c0000,
3667 	0xd28c0001, 0x0001007f,
3668 	0xd28d0001, 0x0002027e,
3669 	0x10020288, 0xb8810904,
3670 	0xb7814000, 0xd1196a01,
3671 	0x00000301, 0xbe800087,
3672 	0xbefc00c1, 0xd89c4000,
3673 	0x00020201, 0xd89cc080,
3674 	0x00040401, 0x320202ff,
3675 	0x00000800, 0x80808100,
3676 	0xbf84fff8, 0x7e020280,
3677 	0xbf810000, 0x00000000,
3678 };
3679 
3680 static const u32 sgpr_init_compute_shader[] =
3681 {
3682 	0xb07c0000, 0xbe8000ff,
3683 	0x0000005f, 0xbee50080,
3684 	0xbe812c65, 0xbe822c65,
3685 	0xbe832c65, 0xbe842c65,
3686 	0xbe852c65, 0xb77c0005,
3687 	0x80808500, 0xbf84fff8,
3688 	0xbe800080, 0xbf810000,
3689 };
3690 
3691 static const struct soc15_reg_entry vgpr_init_regs[] = {
3692    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3693    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3694    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3695    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3696    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3697    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3698    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3699    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3700    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
3701    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
3702 };
3703 
3704 static const struct soc15_reg_entry sgpr_init_regs[] = {
3705    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3706    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3707    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3708    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3709    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3710    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3711    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3712    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3713    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
3714    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
3715 };
3716 
3717 static const struct soc15_reg_entry sec_ded_counter_registers[] = {
3718    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
3719    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
3720    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
3721    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
3722    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
3723    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
3724    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
3725    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
3726    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
3727    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
3728    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
3729    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
3730    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
3731    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
3732    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
3733    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
3734    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
3735    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
3736    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
3737    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
3738    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
3739    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
3740    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
3741    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
3742    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
3743    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
3744    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
3745    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
3746    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
3747    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
3748    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
3749    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
3750 };
3751 
3752 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
3753 {
3754 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
3755 	int i, r;
3756 
3757 	r = amdgpu_ring_alloc(ring, 7);
3758 	if (r) {
3759 		DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
3760 			ring->name, r);
3761 		return r;
3762 	}
3763 
3764 	WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
3765 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
3766 
3767 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3768 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
3769 				PACKET3_DMA_DATA_DST_SEL(1) |
3770 				PACKET3_DMA_DATA_SRC_SEL(2) |
3771 				PACKET3_DMA_DATA_ENGINE(0)));
3772 	amdgpu_ring_write(ring, 0);
3773 	amdgpu_ring_write(ring, 0);
3774 	amdgpu_ring_write(ring, 0);
3775 	amdgpu_ring_write(ring, 0);
3776 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
3777 				adev->gds.gds_size);
3778 
3779 	amdgpu_ring_commit(ring);
3780 
3781 	for (i = 0; i < adev->usec_timeout; i++) {
3782 		if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
3783 			break;
3784 		udelay(1);
3785 	}
3786 
3787 	if (i >= adev->usec_timeout)
3788 		r = -ETIMEDOUT;
3789 
3790 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
3791 
3792 	return r;
3793 }
3794 
3795 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
3796 {
3797 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
3798 	struct amdgpu_ib ib;
3799 	struct dma_fence *f = NULL;
3800 	int r, i, j, k;
3801 	unsigned total_size, vgpr_offset, sgpr_offset;
3802 	u64 gpu_addr;
3803 
3804 	/* only support when RAS is enabled */
3805 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
3806 		return 0;
3807 
3808 	/* bail if the compute ring is not ready */
3809 	if (!ring->sched.ready)
3810 		return 0;
3811 
3812 	total_size =
3813 		((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
3814 	total_size +=
3815 		((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
3816 	total_size = ALIGN(total_size, 256);
3817 	vgpr_offset = total_size;
3818 	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
3819 	sgpr_offset = total_size;
3820 	total_size += sizeof(sgpr_init_compute_shader);
3821 
3822 	/* allocate an indirect buffer to put the commands in */
3823 	memset(&ib, 0, sizeof(ib));
3824 	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
3825 	if (r) {
3826 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
3827 		return r;
3828 	}
3829 
3830 	/* load the compute shaders */
3831 	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
3832 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
3833 
3834 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
3835 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
3836 
3837 	/* init the ib length to 0 */
3838 	ib.length_dw = 0;
3839 
3840 	/* VGPR */
3841 	/* write the register state for the compute dispatch */
3842 	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
3843 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
3844 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
3845 								- PACKET3_SET_SH_REG_START;
3846 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
3847 	}
3848 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
3849 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
3850 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
3851 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
3852 							- PACKET3_SET_SH_REG_START;
3853 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
3854 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
3855 
3856 	/* write dispatch packet */
3857 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
3858 	ib.ptr[ib.length_dw++] = 128; /* x */
3859 	ib.ptr[ib.length_dw++] = 1; /* y */
3860 	ib.ptr[ib.length_dw++] = 1; /* z */
3861 	ib.ptr[ib.length_dw++] =
3862 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
3863 
3864 	/* write CS partial flush packet */
3865 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
3866 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
3867 
3868 	/* SGPR */
3869 	/* write the register state for the compute dispatch */
3870 	for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
3871 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
3872 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
3873 								- PACKET3_SET_SH_REG_START;
3874 		ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
3875 	}
3876 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
3877 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
3878 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
3879 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
3880 							- PACKET3_SET_SH_REG_START;
3881 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
3882 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
3883 
3884 	/* write dispatch packet */
3885 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
3886 	ib.ptr[ib.length_dw++] = 128; /* x */
3887 	ib.ptr[ib.length_dw++] = 1; /* y */
3888 	ib.ptr[ib.length_dw++] = 1; /* z */
3889 	ib.ptr[ib.length_dw++] =
3890 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
3891 
3892 	/* write CS partial flush packet */
3893 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
3894 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
3895 
3896 	/* shedule the ib on the ring */
3897 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
3898 	if (r) {
3899 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
3900 		goto fail;
3901 	}
3902 
3903 	/* wait for the GPU to finish processing the IB */
3904 	r = dma_fence_wait(f, false);
3905 	if (r) {
3906 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
3907 		goto fail;
3908 	}
3909 
3910 	/* read back registers to clear the counters */
3911 	mutex_lock(&adev->grbm_idx_mutex);
3912 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
3913 		for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
3914 			for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
3915 				gfx_v9_0_select_se_sh(adev, j, 0x0, k);
3916 				RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
3917 			}
3918 		}
3919 	}
3920 	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
3921 	mutex_unlock(&adev->grbm_idx_mutex);
3922 
3923 fail:
3924 	amdgpu_ib_free(adev, &ib, NULL);
3925 	dma_fence_put(f);
3926 
3927 	return r;
3928 }
3929 
3930 static int gfx_v9_0_early_init(void *handle)
3931 {
3932 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3933 
3934 	if (adev->asic_type == CHIP_ARCTURUS)
3935 		adev->gfx.num_gfx_rings = 0;
3936 	else
3937 		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
3938 	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
3939 	gfx_v9_0_set_ring_funcs(adev);
3940 	gfx_v9_0_set_irq_funcs(adev);
3941 	gfx_v9_0_set_gds_init(adev);
3942 	gfx_v9_0_set_rlc_funcs(adev);
3943 
3944 	return 0;
3945 }
3946 
3947 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
3948 		struct amdgpu_iv_entry *entry);
3949 
3950 static int gfx_v9_0_ecc_late_init(void *handle)
3951 {
3952 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3953 	struct ras_common_if **ras_if = &adev->gfx.ras_if;
3954 	struct ras_ih_if ih_info = {
3955 		.cb = gfx_v9_0_process_ras_data_cb,
3956 	};
3957 	struct ras_fs_if fs_info = {
3958 		.sysfs_name = "gfx_err_count",
3959 		.debugfs_name = "gfx_err_inject",
3960 	};
3961 	struct ras_common_if ras_block = {
3962 		.block = AMDGPU_RAS_BLOCK__GFX,
3963 		.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
3964 		.sub_block_index = 0,
3965 		.name = "gfx",
3966 	};
3967 	int r;
3968 
3969 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
3970 		amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
3971 		return 0;
3972 	}
3973 
3974 	r = gfx_v9_0_do_edc_gds_workarounds(adev);
3975 	if (r)
3976 		return r;
3977 
3978 	/* requires IBs so do in late init after IB pool is initialized */
3979 	r = gfx_v9_0_do_edc_gpr_workarounds(adev);
3980 	if (r)
3981 		return r;
3982 
3983 	/* handle resume path. */
3984 	if (*ras_if) {
3985 		/* resend ras TA enable cmd during resume.
3986 		 * prepare to handle failure.
3987 		 */
3988 		ih_info.head = **ras_if;
3989 		r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
3990 		if (r) {
3991 			if (r == -EAGAIN) {
3992 				/* request a gpu reset. will run again. */
3993 				amdgpu_ras_request_reset_on_boot(adev,
3994 						AMDGPU_RAS_BLOCK__GFX);
3995 				return 0;
3996 			}
3997 			/* fail to enable ras, cleanup all. */
3998 			goto irq;
3999 		}
4000 		/* enable successfully. continue. */
4001 		goto resume;
4002 	}
4003 
4004 	*ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
4005 	if (!*ras_if)
4006 		return -ENOMEM;
4007 
4008 	**ras_if = ras_block;
4009 
4010 	r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
4011 	if (r) {
4012 		if (r == -EAGAIN) {
4013 			amdgpu_ras_request_reset_on_boot(adev,
4014 					AMDGPU_RAS_BLOCK__GFX);
4015 			r = 0;
4016 		}
4017 		goto feature;
4018 	}
4019 
4020 	ih_info.head = **ras_if;
4021 	fs_info.head = **ras_if;
4022 
4023 	r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
4024 	if (r)
4025 		goto interrupt;
4026 
4027 	amdgpu_ras_debugfs_create(adev, &fs_info);
4028 
4029 	r = amdgpu_ras_sysfs_create(adev, &fs_info);
4030 	if (r)
4031 		goto sysfs;
4032 resume:
4033 	r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
4034 	if (r)
4035 		goto irq;
4036 
4037 	return 0;
4038 irq:
4039 	amdgpu_ras_sysfs_remove(adev, *ras_if);
4040 sysfs:
4041 	amdgpu_ras_debugfs_remove(adev, *ras_if);
4042 	amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
4043 interrupt:
4044 	amdgpu_ras_feature_enable(adev, *ras_if, 0);
4045 feature:
4046 	kfree(*ras_if);
4047 	*ras_if = NULL;
4048 	return r;
4049 }
4050 
4051 static int gfx_v9_0_late_init(void *handle)
4052 {
4053 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4054 	int r;
4055 
4056 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4057 	if (r)
4058 		return r;
4059 
4060 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4061 	if (r)
4062 		return r;
4063 
4064 	r = gfx_v9_0_ecc_late_init(handle);
4065 	if (r)
4066 		return r;
4067 
4068 	return 0;
4069 }
4070 
4071 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4072 {
4073 	uint32_t rlc_setting;
4074 
4075 	/* if RLC is not enabled, do nothing */
4076 	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4077 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4078 		return false;
4079 
4080 	return true;
4081 }
4082 
4083 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4084 {
4085 	uint32_t data;
4086 	unsigned i;
4087 
4088 	data = RLC_SAFE_MODE__CMD_MASK;
4089 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4090 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4091 
4092 	/* wait for RLC_SAFE_MODE */
4093 	for (i = 0; i < adev->usec_timeout; i++) {
4094 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4095 			break;
4096 		udelay(1);
4097 	}
4098 }
4099 
4100 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4101 {
4102 	uint32_t data;
4103 
4104 	data = RLC_SAFE_MODE__CMD_MASK;
4105 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4106 }
4107 
4108 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4109 						bool enable)
4110 {
4111 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4112 
4113 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4114 		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4115 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4116 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4117 	} else {
4118 		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4119 		gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4120 	}
4121 
4122 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4123 }
4124 
4125 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4126 						bool enable)
4127 {
4128 	/* TODO: double check if we need to perform under safe mode */
4129 	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
4130 
4131 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4132 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4133 	else
4134 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4135 
4136 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4137 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4138 	else
4139 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4140 
4141 	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
4142 }
4143 
4144 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4145 						      bool enable)
4146 {
4147 	uint32_t data, def;
4148 
4149 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4150 
4151 	/* It is disabled by HW by default */
4152 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4153 		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
4154 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4155 
4156 		if (adev->asic_type != CHIP_VEGA12)
4157 			data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4158 
4159 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4160 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4161 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4162 
4163 		/* only for Vega10 & Raven1 */
4164 		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4165 
4166 		if (def != data)
4167 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4168 
4169 		/* MGLS is a global flag to control all MGLS in GFX */
4170 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4171 			/* 2 - RLC memory Light sleep */
4172 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4173 				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4174 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4175 				if (def != data)
4176 					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4177 			}
4178 			/* 3 - CP memory Light sleep */
4179 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4180 				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4181 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4182 				if (def != data)
4183 					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4184 			}
4185 		}
4186 	} else {
4187 		/* 1 - MGCG_OVERRIDE */
4188 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4189 
4190 		if (adev->asic_type != CHIP_VEGA12)
4191 			data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4192 
4193 		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4194 			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4195 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4196 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4197 
4198 		if (def != data)
4199 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4200 
4201 		/* 2 - disable MGLS in RLC */
4202 		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4203 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4204 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4205 			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4206 		}
4207 
4208 		/* 3 - disable MGLS in CP */
4209 		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4210 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4211 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4212 			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4213 		}
4214 	}
4215 
4216 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4217 }
4218 
4219 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4220 					   bool enable)
4221 {
4222 	uint32_t data, def;
4223 
4224 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4225 
4226 	/* Enable 3D CGCG/CGLS */
4227 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4228 		/* write cmd to clear cgcg/cgls ov */
4229 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4230 		/* unset CGCG override */
4231 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4232 		/* update CGCG and CGLS override bits */
4233 		if (def != data)
4234 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4235 
4236 		/* enable 3Dcgcg FSM(0x0000363f) */
4237 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4238 
4239 		data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4240 			RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4241 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4242 			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4243 				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4244 		if (def != data)
4245 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4246 
4247 		/* set IDLE_POLL_COUNT(0x00900100) */
4248 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4249 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4250 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4251 		if (def != data)
4252 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4253 	} else {
4254 		/* Disable CGCG/CGLS */
4255 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4256 		/* disable cgcg, cgls should be disabled */
4257 		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4258 			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4259 		/* disable cgcg and cgls in FSM */
4260 		if (def != data)
4261 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4262 	}
4263 
4264 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4265 }
4266 
4267 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4268 						      bool enable)
4269 {
4270 	uint32_t def, data;
4271 
4272 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4273 
4274 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4275 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4276 		/* unset CGCG override */
4277 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4278 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4279 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4280 		else
4281 			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4282 		/* update CGCG and CGLS override bits */
4283 		if (def != data)
4284 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4285 
4286 		/* enable cgcg FSM(0x0000363F) */
4287 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4288 
4289 		data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4290 			RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4291 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4292 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4293 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4294 		if (def != data)
4295 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4296 
4297 		/* set IDLE_POLL_COUNT(0x00900100) */
4298 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4299 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4300 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4301 		if (def != data)
4302 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4303 	} else {
4304 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4305 		/* reset CGCG/CGLS bits */
4306 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4307 		/* disable cgcg and cgls in FSM */
4308 		if (def != data)
4309 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4310 	}
4311 
4312 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4313 }
4314 
4315 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4316 					    bool enable)
4317 {
4318 	if (enable) {
4319 		/* CGCG/CGLS should be enabled after MGCG/MGLS
4320 		 * ===  MGCG + MGLS ===
4321 		 */
4322 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4323 		/* ===  CGCG /CGLS for GFX 3D Only === */
4324 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4325 		/* ===  CGCG + CGLS === */
4326 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4327 	} else {
4328 		/* CGCG/CGLS should be disabled before MGCG/MGLS
4329 		 * ===  CGCG + CGLS ===
4330 		 */
4331 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4332 		/* ===  CGCG /CGLS for GFX 3D Only === */
4333 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4334 		/* ===  MGCG + MGLS === */
4335 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4336 	}
4337 	return 0;
4338 }
4339 
4340 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4341 	.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4342 	.set_safe_mode = gfx_v9_0_set_safe_mode,
4343 	.unset_safe_mode = gfx_v9_0_unset_safe_mode,
4344 	.init = gfx_v9_0_rlc_init,
4345 	.get_csb_size = gfx_v9_0_get_csb_size,
4346 	.get_csb_buffer = gfx_v9_0_get_csb_buffer,
4347 	.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4348 	.resume = gfx_v9_0_rlc_resume,
4349 	.stop = gfx_v9_0_rlc_stop,
4350 	.reset = gfx_v9_0_rlc_reset,
4351 	.start = gfx_v9_0_rlc_start
4352 };
4353 
4354 static int gfx_v9_0_set_powergating_state(void *handle,
4355 					  enum amd_powergating_state state)
4356 {
4357 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4358 	bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
4359 
4360 	switch (adev->asic_type) {
4361 	case CHIP_RAVEN:
4362 		if (!enable) {
4363 			amdgpu_gfx_off_ctrl(adev, false);
4364 			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4365 		}
4366 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4367 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4368 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4369 		} else {
4370 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4371 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4372 		}
4373 
4374 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4375 			gfx_v9_0_enable_cp_power_gating(adev, true);
4376 		else
4377 			gfx_v9_0_enable_cp_power_gating(adev, false);
4378 
4379 		/* update gfx cgpg state */
4380 		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4381 
4382 		/* update mgcg state */
4383 		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4384 
4385 		if (enable)
4386 			amdgpu_gfx_off_ctrl(adev, true);
4387 		break;
4388 	case CHIP_VEGA12:
4389 		if (!enable) {
4390 			amdgpu_gfx_off_ctrl(adev, false);
4391 			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4392 		} else {
4393 			amdgpu_gfx_off_ctrl(adev, true);
4394 		}
4395 		break;
4396 	default:
4397 		break;
4398 	}
4399 
4400 	return 0;
4401 }
4402 
4403 static int gfx_v9_0_set_clockgating_state(void *handle,
4404 					  enum amd_clockgating_state state)
4405 {
4406 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4407 
4408 	if (amdgpu_sriov_vf(adev))
4409 		return 0;
4410 
4411 	switch (adev->asic_type) {
4412 	case CHIP_VEGA10:
4413 	case CHIP_VEGA12:
4414 	case CHIP_VEGA20:
4415 	case CHIP_RAVEN:
4416 		gfx_v9_0_update_gfx_clock_gating(adev,
4417 						 state == AMD_CG_STATE_GATE ? true : false);
4418 		break;
4419 	default:
4420 		break;
4421 	}
4422 	return 0;
4423 }
4424 
4425 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4426 {
4427 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4428 	int data;
4429 
4430 	if (amdgpu_sriov_vf(adev))
4431 		*flags = 0;
4432 
4433 	/* AMD_CG_SUPPORT_GFX_MGCG */
4434 	data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4435 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4436 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
4437 
4438 	/* AMD_CG_SUPPORT_GFX_CGCG */
4439 	data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4440 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4441 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
4442 
4443 	/* AMD_CG_SUPPORT_GFX_CGLS */
4444 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4445 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
4446 
4447 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
4448 	data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4449 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4450 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4451 
4452 	/* AMD_CG_SUPPORT_GFX_CP_LS */
4453 	data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4454 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4455 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4456 
4457 	if (adev->asic_type != CHIP_ARCTURUS) {
4458 		/* AMD_CG_SUPPORT_GFX_3D_CGCG */
4459 		data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4460 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4461 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4462 
4463 		/* AMD_CG_SUPPORT_GFX_3D_CGLS */
4464 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4465 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4466 	}
4467 }
4468 
4469 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4470 {
4471 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4472 }
4473 
4474 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4475 {
4476 	struct amdgpu_device *adev = ring->adev;
4477 	u64 wptr;
4478 
4479 	/* XXX check if swapping is necessary on BE */
4480 	if (ring->use_doorbell) {
4481 		wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4482 	} else {
4483 		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4484 		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4485 	}
4486 
4487 	return wptr;
4488 }
4489 
4490 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4491 {
4492 	struct amdgpu_device *adev = ring->adev;
4493 
4494 	if (ring->use_doorbell) {
4495 		/* XXX check if swapping is necessary on BE */
4496 		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4497 		WDOORBELL64(ring->doorbell_index, ring->wptr);
4498 	} else {
4499 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4500 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4501 	}
4502 }
4503 
4504 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4505 {
4506 	struct amdgpu_device *adev = ring->adev;
4507 	u32 ref_and_mask, reg_mem_engine;
4508 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
4509 
4510 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4511 		switch (ring->me) {
4512 		case 1:
4513 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4514 			break;
4515 		case 2:
4516 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4517 			break;
4518 		default:
4519 			return;
4520 		}
4521 		reg_mem_engine = 0;
4522 	} else {
4523 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4524 		reg_mem_engine = 1; /* pfp */
4525 	}
4526 
4527 	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4528 			      adev->nbio_funcs->get_hdp_flush_req_offset(adev),
4529 			      adev->nbio_funcs->get_hdp_flush_done_offset(adev),
4530 			      ref_and_mask, ref_and_mask, 0x20);
4531 }
4532 
4533 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4534 					struct amdgpu_job *job,
4535 					struct amdgpu_ib *ib,
4536 					uint32_t flags)
4537 {
4538 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4539 	u32 header, control = 0;
4540 
4541 	if (ib->flags & AMDGPU_IB_FLAG_CE)
4542 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4543 	else
4544 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4545 
4546 	control |= ib->length_dw | (vmid << 24);
4547 
4548 	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
4549 		control |= INDIRECT_BUFFER_PRE_ENB(1);
4550 
4551 		if (!(ib->flags & AMDGPU_IB_FLAG_CE))
4552 			gfx_v9_0_ring_emit_de_meta(ring);
4553 	}
4554 
4555 	amdgpu_ring_write(ring, header);
4556 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4557 	amdgpu_ring_write(ring,
4558 #ifdef __BIG_ENDIAN
4559 		(2 << 0) |
4560 #endif
4561 		lower_32_bits(ib->gpu_addr));
4562 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4563 	amdgpu_ring_write(ring, control);
4564 }
4565 
4566 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4567 					  struct amdgpu_job *job,
4568 					  struct amdgpu_ib *ib,
4569 					  uint32_t flags)
4570 {
4571 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4572 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
4573 
4574 	/* Currently, there is a high possibility to get wave ID mismatch
4575 	 * between ME and GDS, leading to a hw deadlock, because ME generates
4576 	 * different wave IDs than the GDS expects. This situation happens
4577 	 * randomly when at least 5 compute pipes use GDS ordered append.
4578 	 * The wave IDs generated by ME are also wrong after suspend/resume.
4579 	 * Those are probably bugs somewhere else in the kernel driver.
4580 	 *
4581 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
4582 	 * GDS to 0 for this ring (me/pipe).
4583 	 */
4584 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
4585 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
4586 		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
4587 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
4588 	}
4589 
4590 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
4591 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4592 	amdgpu_ring_write(ring,
4593 #ifdef __BIG_ENDIAN
4594 				(2 << 0) |
4595 #endif
4596 				lower_32_bits(ib->gpu_addr));
4597 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4598 	amdgpu_ring_write(ring, control);
4599 }
4600 
4601 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
4602 				     u64 seq, unsigned flags)
4603 {
4604 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4605 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4606 	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
4607 
4608 	/* RELEASE_MEM - flush caches, send int */
4609 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
4610 	amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
4611 					       EOP_TC_NC_ACTION_EN) :
4612 					      (EOP_TCL1_ACTION_EN |
4613 					       EOP_TC_ACTION_EN |
4614 					       EOP_TC_WB_ACTION_EN |
4615 					       EOP_TC_MD_ACTION_EN)) |
4616 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4617 				 EVENT_INDEX(5)));
4618 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4619 
4620 	/*
4621 	 * the address should be Qword aligned if 64bit write, Dword
4622 	 * aligned if only send 32bit data low (discard data high)
4623 	 */
4624 	if (write64bit)
4625 		BUG_ON(addr & 0x7);
4626 	else
4627 		BUG_ON(addr & 0x3);
4628 	amdgpu_ring_write(ring, lower_32_bits(addr));
4629 	amdgpu_ring_write(ring, upper_32_bits(addr));
4630 	amdgpu_ring_write(ring, lower_32_bits(seq));
4631 	amdgpu_ring_write(ring, upper_32_bits(seq));
4632 	amdgpu_ring_write(ring, 0);
4633 }
4634 
4635 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
4636 {
4637 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4638 	uint32_t seq = ring->fence_drv.sync_seq;
4639 	uint64_t addr = ring->fence_drv.gpu_addr;
4640 
4641 	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
4642 			      lower_32_bits(addr), upper_32_bits(addr),
4643 			      seq, 0xffffffff, 4);
4644 }
4645 
4646 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4647 					unsigned vmid, uint64_t pd_addr)
4648 {
4649 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
4650 
4651 	/* compute doesn't have PFP */
4652 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
4653 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
4654 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4655 		amdgpu_ring_write(ring, 0x0);
4656 	}
4657 }
4658 
4659 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4660 {
4661 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
4662 }
4663 
4664 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4665 {
4666 	u64 wptr;
4667 
4668 	/* XXX check if swapping is necessary on BE */
4669 	if (ring->use_doorbell)
4670 		wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
4671 	else
4672 		BUG();
4673 	return wptr;
4674 }
4675 
4676 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
4677 					   bool acquire)
4678 {
4679 	struct amdgpu_device *adev = ring->adev;
4680 	int pipe_num, tmp, reg;
4681 	int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
4682 
4683 	pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
4684 
4685 	/* first me only has 2 entries, GFX and HP3D */
4686 	if (ring->me > 0)
4687 		pipe_num -= 2;
4688 
4689 	reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
4690 	tmp = RREG32(reg);
4691 	tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
4692 	WREG32(reg, tmp);
4693 }
4694 
4695 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
4696 					    struct amdgpu_ring *ring,
4697 					    bool acquire)
4698 {
4699 	int i, pipe;
4700 	bool reserve;
4701 	struct amdgpu_ring *iring;
4702 
4703 	mutex_lock(&adev->gfx.pipe_reserve_mutex);
4704 	pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
4705 	if (acquire)
4706 		set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4707 	else
4708 		clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4709 
4710 	if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
4711 		/* Clear all reservations - everyone reacquires all resources */
4712 		for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
4713 			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
4714 						       true);
4715 
4716 		for (i = 0; i < adev->gfx.num_compute_rings; ++i)
4717 			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
4718 						       true);
4719 	} else {
4720 		/* Lower all pipes without a current reservation */
4721 		for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
4722 			iring = &adev->gfx.gfx_ring[i];
4723 			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
4724 							   iring->me,
4725 							   iring->pipe,
4726 							   0);
4727 			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4728 			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4729 		}
4730 
4731 		for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
4732 			iring = &adev->gfx.compute_ring[i];
4733 			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
4734 							   iring->me,
4735 							   iring->pipe,
4736 							   0);
4737 			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4738 			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4739 		}
4740 	}
4741 
4742 	mutex_unlock(&adev->gfx.pipe_reserve_mutex);
4743 }
4744 
4745 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
4746 				      struct amdgpu_ring *ring,
4747 				      bool acquire)
4748 {
4749 	uint32_t pipe_priority = acquire ? 0x2 : 0x0;
4750 	uint32_t queue_priority = acquire ? 0xf : 0x0;
4751 
4752 	mutex_lock(&adev->srbm_mutex);
4753 	soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4754 
4755 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
4756 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
4757 
4758 	soc15_grbm_select(adev, 0, 0, 0, 0);
4759 	mutex_unlock(&adev->srbm_mutex);
4760 }
4761 
4762 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
4763 					       enum drm_sched_priority priority)
4764 {
4765 	struct amdgpu_device *adev = ring->adev;
4766 	bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
4767 
4768 	if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
4769 		return;
4770 
4771 	gfx_v9_0_hqd_set_priority(adev, ring, acquire);
4772 	gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
4773 }
4774 
4775 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4776 {
4777 	struct amdgpu_device *adev = ring->adev;
4778 
4779 	/* XXX check if swapping is necessary on BE */
4780 	if (ring->use_doorbell) {
4781 		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4782 		WDOORBELL64(ring->doorbell_index, ring->wptr);
4783 	} else{
4784 		BUG(); /* only DOORBELL method supported on gfx9 now */
4785 	}
4786 }
4787 
4788 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
4789 					 u64 seq, unsigned int flags)
4790 {
4791 	struct amdgpu_device *adev = ring->adev;
4792 
4793 	/* we only allocate 32bit for each seq wb address */
4794 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
4795 
4796 	/* write fence seq to the "addr" */
4797 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4798 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4799 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
4800 	amdgpu_ring_write(ring, lower_32_bits(addr));
4801 	amdgpu_ring_write(ring, upper_32_bits(addr));
4802 	amdgpu_ring_write(ring, lower_32_bits(seq));
4803 
4804 	if (flags & AMDGPU_FENCE_FLAG_INT) {
4805 		/* set register to trigger INT */
4806 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4807 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4808 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
4809 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
4810 		amdgpu_ring_write(ring, 0);
4811 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
4812 	}
4813 }
4814 
4815 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
4816 {
4817 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4818 	amdgpu_ring_write(ring, 0);
4819 }
4820 
4821 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
4822 {
4823 	struct v9_ce_ib_state ce_payload = {0};
4824 	uint64_t csa_addr;
4825 	int cnt;
4826 
4827 	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
4828 	csa_addr = amdgpu_csa_vaddr(ring->adev);
4829 
4830 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
4831 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
4832 				 WRITE_DATA_DST_SEL(8) |
4833 				 WR_CONFIRM) |
4834 				 WRITE_DATA_CACHE_POLICY(0));
4835 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
4836 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
4837 	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
4838 }
4839 
4840 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
4841 {
4842 	struct v9_de_ib_state de_payload = {0};
4843 	uint64_t csa_addr, gds_addr;
4844 	int cnt;
4845 
4846 	csa_addr = amdgpu_csa_vaddr(ring->adev);
4847 	gds_addr = csa_addr + 4096;
4848 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
4849 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
4850 
4851 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
4852 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
4853 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
4854 				 WRITE_DATA_DST_SEL(8) |
4855 				 WR_CONFIRM) |
4856 				 WRITE_DATA_CACHE_POLICY(0));
4857 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
4858 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
4859 	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
4860 }
4861 
4862 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
4863 {
4864 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
4865 	amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
4866 }
4867 
4868 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
4869 {
4870 	uint32_t dw2 = 0;
4871 
4872 	if (amdgpu_sriov_vf(ring->adev))
4873 		gfx_v9_0_ring_emit_ce_meta(ring);
4874 
4875 	gfx_v9_0_ring_emit_tmz(ring, true);
4876 
4877 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
4878 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
4879 		/* set load_global_config & load_global_uconfig */
4880 		dw2 |= 0x8001;
4881 		/* set load_cs_sh_regs */
4882 		dw2 |= 0x01000000;
4883 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
4884 		dw2 |= 0x10002;
4885 
4886 		/* set load_ce_ram if preamble presented */
4887 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
4888 			dw2 |= 0x10000000;
4889 	} else {
4890 		/* still load_ce_ram if this is the first time preamble presented
4891 		 * although there is no context switch happens.
4892 		 */
4893 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
4894 			dw2 |= 0x10000000;
4895 	}
4896 
4897 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4898 	amdgpu_ring_write(ring, dw2);
4899 	amdgpu_ring_write(ring, 0);
4900 }
4901 
4902 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
4903 {
4904 	unsigned ret;
4905 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
4906 	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
4907 	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
4908 	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
4909 	ret = ring->wptr & ring->buf_mask;
4910 	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
4911 	return ret;
4912 }
4913 
4914 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
4915 {
4916 	unsigned cur;
4917 	BUG_ON(offset > ring->buf_mask);
4918 	BUG_ON(ring->ring[offset] != 0x55aa55aa);
4919 
4920 	cur = (ring->wptr & ring->buf_mask) - 1;
4921 	if (likely(cur > offset))
4922 		ring->ring[offset] = cur - offset;
4923 	else
4924 		ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
4925 }
4926 
4927 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
4928 {
4929 	struct amdgpu_device *adev = ring->adev;
4930 
4931 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4932 	amdgpu_ring_write(ring, 0 |	/* src: register*/
4933 				(5 << 8) |	/* dst: memory */
4934 				(1 << 20));	/* write confirm */
4935 	amdgpu_ring_write(ring, reg);
4936 	amdgpu_ring_write(ring, 0);
4937 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4938 				adev->virt.reg_val_offs * 4));
4939 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4940 				adev->virt.reg_val_offs * 4));
4941 }
4942 
4943 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
4944 				    uint32_t val)
4945 {
4946 	uint32_t cmd = 0;
4947 
4948 	switch (ring->funcs->type) {
4949 	case AMDGPU_RING_TYPE_GFX:
4950 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
4951 		break;
4952 	case AMDGPU_RING_TYPE_KIQ:
4953 		cmd = (1 << 16); /* no inc addr */
4954 		break;
4955 	default:
4956 		cmd = WR_CONFIRM;
4957 		break;
4958 	}
4959 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4960 	amdgpu_ring_write(ring, cmd);
4961 	amdgpu_ring_write(ring, reg);
4962 	amdgpu_ring_write(ring, 0);
4963 	amdgpu_ring_write(ring, val);
4964 }
4965 
4966 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
4967 					uint32_t val, uint32_t mask)
4968 {
4969 	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
4970 }
4971 
4972 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
4973 						  uint32_t reg0, uint32_t reg1,
4974 						  uint32_t ref, uint32_t mask)
4975 {
4976 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4977 	struct amdgpu_device *adev = ring->adev;
4978 	bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
4979 		adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
4980 
4981 	if (fw_version_ok)
4982 		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
4983 				      ref, mask, 0x20);
4984 	else
4985 		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
4986 							   ref, mask);
4987 }
4988 
4989 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
4990 {
4991 	struct amdgpu_device *adev = ring->adev;
4992 	uint32_t value = 0;
4993 
4994 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
4995 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
4996 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
4997 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
4998 	WREG32(mmSQ_CMD, value);
4999 }
5000 
5001 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5002 						 enum amdgpu_interrupt_state state)
5003 {
5004 	switch (state) {
5005 	case AMDGPU_IRQ_STATE_DISABLE:
5006 	case AMDGPU_IRQ_STATE_ENABLE:
5007 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5008 			       TIME_STAMP_INT_ENABLE,
5009 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5010 		break;
5011 	default:
5012 		break;
5013 	}
5014 }
5015 
5016 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5017 						     int me, int pipe,
5018 						     enum amdgpu_interrupt_state state)
5019 {
5020 	u32 mec_int_cntl, mec_int_cntl_reg;
5021 
5022 	/*
5023 	 * amdgpu controls only the first MEC. That's why this function only
5024 	 * handles the setting of interrupts for this specific MEC. All other
5025 	 * pipes' interrupts are set by amdkfd.
5026 	 */
5027 
5028 	if (me == 1) {
5029 		switch (pipe) {
5030 		case 0:
5031 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5032 			break;
5033 		case 1:
5034 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5035 			break;
5036 		case 2:
5037 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5038 			break;
5039 		case 3:
5040 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5041 			break;
5042 		default:
5043 			DRM_DEBUG("invalid pipe %d\n", pipe);
5044 			return;
5045 		}
5046 	} else {
5047 		DRM_DEBUG("invalid me %d\n", me);
5048 		return;
5049 	}
5050 
5051 	switch (state) {
5052 	case AMDGPU_IRQ_STATE_DISABLE:
5053 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5054 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5055 					     TIME_STAMP_INT_ENABLE, 0);
5056 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5057 		break;
5058 	case AMDGPU_IRQ_STATE_ENABLE:
5059 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5060 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5061 					     TIME_STAMP_INT_ENABLE, 1);
5062 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5063 		break;
5064 	default:
5065 		break;
5066 	}
5067 }
5068 
5069 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5070 					     struct amdgpu_irq_src *source,
5071 					     unsigned type,
5072 					     enum amdgpu_interrupt_state state)
5073 {
5074 	switch (state) {
5075 	case AMDGPU_IRQ_STATE_DISABLE:
5076 	case AMDGPU_IRQ_STATE_ENABLE:
5077 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5078 			       PRIV_REG_INT_ENABLE,
5079 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5080 		break;
5081 	default:
5082 		break;
5083 	}
5084 
5085 	return 0;
5086 }
5087 
5088 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5089 					      struct amdgpu_irq_src *source,
5090 					      unsigned type,
5091 					      enum amdgpu_interrupt_state state)
5092 {
5093 	switch (state) {
5094 	case AMDGPU_IRQ_STATE_DISABLE:
5095 	case AMDGPU_IRQ_STATE_ENABLE:
5096 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5097 			       PRIV_INSTR_INT_ENABLE,
5098 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5099 	default:
5100 		break;
5101 	}
5102 
5103 	return 0;
5104 }
5105 
5106 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)				\
5107 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5108 			CP_ECC_ERROR_INT_ENABLE, 1)
5109 
5110 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)			\
5111 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5112 			CP_ECC_ERROR_INT_ENABLE, 0)
5113 
5114 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5115 					      struct amdgpu_irq_src *source,
5116 					      unsigned type,
5117 					      enum amdgpu_interrupt_state state)
5118 {
5119 	switch (state) {
5120 	case AMDGPU_IRQ_STATE_DISABLE:
5121 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5122 				CP_ECC_ERROR_INT_ENABLE, 0);
5123 		DISABLE_ECC_ON_ME_PIPE(1, 0);
5124 		DISABLE_ECC_ON_ME_PIPE(1, 1);
5125 		DISABLE_ECC_ON_ME_PIPE(1, 2);
5126 		DISABLE_ECC_ON_ME_PIPE(1, 3);
5127 		break;
5128 
5129 	case AMDGPU_IRQ_STATE_ENABLE:
5130 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5131 				CP_ECC_ERROR_INT_ENABLE, 1);
5132 		ENABLE_ECC_ON_ME_PIPE(1, 0);
5133 		ENABLE_ECC_ON_ME_PIPE(1, 1);
5134 		ENABLE_ECC_ON_ME_PIPE(1, 2);
5135 		ENABLE_ECC_ON_ME_PIPE(1, 3);
5136 		break;
5137 	default:
5138 		break;
5139 	}
5140 
5141 	return 0;
5142 }
5143 
5144 
5145 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5146 					    struct amdgpu_irq_src *src,
5147 					    unsigned type,
5148 					    enum amdgpu_interrupt_state state)
5149 {
5150 	switch (type) {
5151 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5152 		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5153 		break;
5154 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5155 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5156 		break;
5157 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5158 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5159 		break;
5160 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5161 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5162 		break;
5163 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5164 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5165 		break;
5166 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5167 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5168 		break;
5169 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5170 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5171 		break;
5172 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5173 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5174 		break;
5175 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5176 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5177 		break;
5178 	default:
5179 		break;
5180 	}
5181 	return 0;
5182 }
5183 
5184 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5185 			    struct amdgpu_irq_src *source,
5186 			    struct amdgpu_iv_entry *entry)
5187 {
5188 	int i;
5189 	u8 me_id, pipe_id, queue_id;
5190 	struct amdgpu_ring *ring;
5191 
5192 	DRM_DEBUG("IH: CP EOP\n");
5193 	me_id = (entry->ring_id & 0x0c) >> 2;
5194 	pipe_id = (entry->ring_id & 0x03) >> 0;
5195 	queue_id = (entry->ring_id & 0x70) >> 4;
5196 
5197 	switch (me_id) {
5198 	case 0:
5199 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5200 		break;
5201 	case 1:
5202 	case 2:
5203 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5204 			ring = &adev->gfx.compute_ring[i];
5205 			/* Per-queue interrupt is supported for MEC starting from VI.
5206 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
5207 			  */
5208 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5209 				amdgpu_fence_process(ring);
5210 		}
5211 		break;
5212 	}
5213 	return 0;
5214 }
5215 
5216 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5217 			   struct amdgpu_iv_entry *entry)
5218 {
5219 	u8 me_id, pipe_id, queue_id;
5220 	struct amdgpu_ring *ring;
5221 	int i;
5222 
5223 	me_id = (entry->ring_id & 0x0c) >> 2;
5224 	pipe_id = (entry->ring_id & 0x03) >> 0;
5225 	queue_id = (entry->ring_id & 0x70) >> 4;
5226 
5227 	switch (me_id) {
5228 	case 0:
5229 		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5230 		break;
5231 	case 1:
5232 	case 2:
5233 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5234 			ring = &adev->gfx.compute_ring[i];
5235 			if (ring->me == me_id && ring->pipe == pipe_id &&
5236 			    ring->queue == queue_id)
5237 				drm_sched_fault(&ring->sched);
5238 		}
5239 		break;
5240 	}
5241 }
5242 
5243 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5244 				 struct amdgpu_irq_src *source,
5245 				 struct amdgpu_iv_entry *entry)
5246 {
5247 	DRM_ERROR("Illegal register access in command stream\n");
5248 	gfx_v9_0_fault(adev, entry);
5249 	return 0;
5250 }
5251 
5252 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5253 				  struct amdgpu_irq_src *source,
5254 				  struct amdgpu_iv_entry *entry)
5255 {
5256 	DRM_ERROR("Illegal instruction in command stream\n");
5257 	gfx_v9_0_fault(adev, entry);
5258 	return 0;
5259 }
5260 
5261 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
5262 		struct amdgpu_iv_entry *entry)
5263 {
5264 	/* TODO ue will trigger an interrupt. */
5265 	kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
5266 	amdgpu_ras_reset_gpu(adev, 0);
5267 	return AMDGPU_RAS_UE;
5268 }
5269 
5270 static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev,
5271 				  struct amdgpu_irq_src *source,
5272 				  struct amdgpu_iv_entry *entry)
5273 {
5274 	struct ras_common_if *ras_if = adev->gfx.ras_if;
5275 	struct ras_dispatch_if ih_data = {
5276 		.entry = entry,
5277 	};
5278 
5279 	if (!ras_if)
5280 		return 0;
5281 
5282 	ih_data.head = *ras_if;
5283 
5284 	DRM_ERROR("CP ECC ERROR IRQ\n");
5285 	amdgpu_ras_interrupt_dispatch(adev, &ih_data);
5286 	return 0;
5287 }
5288 
5289 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
5290 	.name = "gfx_v9_0",
5291 	.early_init = gfx_v9_0_early_init,
5292 	.late_init = gfx_v9_0_late_init,
5293 	.sw_init = gfx_v9_0_sw_init,
5294 	.sw_fini = gfx_v9_0_sw_fini,
5295 	.hw_init = gfx_v9_0_hw_init,
5296 	.hw_fini = gfx_v9_0_hw_fini,
5297 	.suspend = gfx_v9_0_suspend,
5298 	.resume = gfx_v9_0_resume,
5299 	.is_idle = gfx_v9_0_is_idle,
5300 	.wait_for_idle = gfx_v9_0_wait_for_idle,
5301 	.soft_reset = gfx_v9_0_soft_reset,
5302 	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
5303 	.set_powergating_state = gfx_v9_0_set_powergating_state,
5304 	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
5305 };
5306 
5307 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
5308 	.type = AMDGPU_RING_TYPE_GFX,
5309 	.align_mask = 0xff,
5310 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
5311 	.support_64bit_ptrs = true,
5312 	.vmhub = AMDGPU_GFXHUB_0,
5313 	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
5314 	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
5315 	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
5316 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
5317 		5 +  /* COND_EXEC */
5318 		7 +  /* PIPELINE_SYNC */
5319 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5320 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5321 		2 + /* VM_FLUSH */
5322 		8 +  /* FENCE for VM_FLUSH */
5323 		20 + /* GDS switch */
5324 		4 + /* double SWITCH_BUFFER,
5325 		       the first COND_EXEC jump to the place just
5326 			   prior to this double SWITCH_BUFFER  */
5327 		5 + /* COND_EXEC */
5328 		7 +	 /*	HDP_flush */
5329 		4 +	 /*	VGT_flush */
5330 		14 + /*	CE_META */
5331 		31 + /*	DE_META */
5332 		3 + /* CNTX_CTRL */
5333 		5 + /* HDP_INVL */
5334 		8 + 8 + /* FENCE x2 */
5335 		2, /* SWITCH_BUFFER */
5336 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
5337 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
5338 	.emit_fence = gfx_v9_0_ring_emit_fence,
5339 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
5340 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
5341 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
5342 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
5343 	.test_ring = gfx_v9_0_ring_test_ring,
5344 	.test_ib = gfx_v9_0_ring_test_ib,
5345 	.insert_nop = amdgpu_ring_insert_nop,
5346 	.pad_ib = amdgpu_ring_generic_pad_ib,
5347 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
5348 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
5349 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
5350 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
5351 	.emit_tmz = gfx_v9_0_ring_emit_tmz,
5352 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
5353 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5354 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5355 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
5356 };
5357 
5358 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
5359 	.type = AMDGPU_RING_TYPE_COMPUTE,
5360 	.align_mask = 0xff,
5361 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
5362 	.support_64bit_ptrs = true,
5363 	.vmhub = AMDGPU_GFXHUB_0,
5364 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
5365 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
5366 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
5367 	.emit_frame_size =
5368 		20 + /* gfx_v9_0_ring_emit_gds_switch */
5369 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
5370 		5 + /* hdp invalidate */
5371 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
5372 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5373 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5374 		2 + /* gfx_v9_0_ring_emit_vm_flush */
5375 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
5376 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
5377 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
5378 	.emit_fence = gfx_v9_0_ring_emit_fence,
5379 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
5380 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
5381 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
5382 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
5383 	.test_ring = gfx_v9_0_ring_test_ring,
5384 	.test_ib = gfx_v9_0_ring_test_ib,
5385 	.insert_nop = amdgpu_ring_insert_nop,
5386 	.pad_ib = amdgpu_ring_generic_pad_ib,
5387 	.set_priority = gfx_v9_0_ring_set_priority_compute,
5388 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
5389 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5390 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5391 };
5392 
5393 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
5394 	.type = AMDGPU_RING_TYPE_KIQ,
5395 	.align_mask = 0xff,
5396 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
5397 	.support_64bit_ptrs = true,
5398 	.vmhub = AMDGPU_GFXHUB_0,
5399 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
5400 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
5401 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
5402 	.emit_frame_size =
5403 		20 + /* gfx_v9_0_ring_emit_gds_switch */
5404 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
5405 		5 + /* hdp invalidate */
5406 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
5407 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5408 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5409 		2 + /* gfx_v9_0_ring_emit_vm_flush */
5410 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
5411 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
5412 	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
5413 	.test_ring = gfx_v9_0_ring_test_ring,
5414 	.insert_nop = amdgpu_ring_insert_nop,
5415 	.pad_ib = amdgpu_ring_generic_pad_ib,
5416 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
5417 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
5418 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5419 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5420 };
5421 
5422 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
5423 {
5424 	int i;
5425 
5426 	adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
5427 
5428 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5429 		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
5430 
5431 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
5432 		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
5433 }
5434 
5435 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
5436 	.set = gfx_v9_0_set_eop_interrupt_state,
5437 	.process = gfx_v9_0_eop_irq,
5438 };
5439 
5440 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
5441 	.set = gfx_v9_0_set_priv_reg_fault_state,
5442 	.process = gfx_v9_0_priv_reg_irq,
5443 };
5444 
5445 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
5446 	.set = gfx_v9_0_set_priv_inst_fault_state,
5447 	.process = gfx_v9_0_priv_inst_irq,
5448 };
5449 
5450 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
5451 	.set = gfx_v9_0_set_cp_ecc_error_state,
5452 	.process = gfx_v9_0_cp_ecc_error_irq,
5453 };
5454 
5455 
5456 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
5457 {
5458 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5459 	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
5460 
5461 	adev->gfx.priv_reg_irq.num_types = 1;
5462 	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
5463 
5464 	adev->gfx.priv_inst_irq.num_types = 1;
5465 	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
5466 
5467 	adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
5468 	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
5469 }
5470 
5471 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
5472 {
5473 	switch (adev->asic_type) {
5474 	case CHIP_VEGA10:
5475 	case CHIP_VEGA12:
5476 	case CHIP_VEGA20:
5477 	case CHIP_RAVEN:
5478 	case CHIP_ARCTURUS:
5479 		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
5480 		break;
5481 	default:
5482 		break;
5483 	}
5484 }
5485 
5486 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
5487 {
5488 	/* init asci gds info */
5489 	switch (adev->asic_type) {
5490 	case CHIP_VEGA10:
5491 	case CHIP_VEGA12:
5492 	case CHIP_VEGA20:
5493 		adev->gds.gds_size = 0x10000;
5494 		break;
5495 	case CHIP_RAVEN:
5496 	case CHIP_ARCTURUS:
5497 		adev->gds.gds_size = 0x1000;
5498 		break;
5499 	default:
5500 		adev->gds.gds_size = 0x10000;
5501 		break;
5502 	}
5503 
5504 	switch (adev->asic_type) {
5505 	case CHIP_VEGA10:
5506 	case CHIP_VEGA20:
5507 		adev->gds.gds_compute_max_wave_id = 0x7ff;
5508 		break;
5509 	case CHIP_VEGA12:
5510 		adev->gds.gds_compute_max_wave_id = 0x27f;
5511 		break;
5512 	case CHIP_RAVEN:
5513 		if (adev->rev_id >= 0x8)
5514 			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
5515 		else
5516 			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
5517 		break;
5518 	case CHIP_ARCTURUS:
5519 		adev->gds.gds_compute_max_wave_id = 0xfff;
5520 		break;
5521 	default:
5522 		/* this really depends on the chip */
5523 		adev->gds.gds_compute_max_wave_id = 0x7ff;
5524 		break;
5525 	}
5526 
5527 	adev->gds.gws_size = 64;
5528 	adev->gds.oa_size = 16;
5529 }
5530 
5531 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
5532 						 u32 bitmap)
5533 {
5534 	u32 data;
5535 
5536 	if (!bitmap)
5537 		return;
5538 
5539 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5540 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5541 
5542 	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
5543 }
5544 
5545 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
5546 {
5547 	u32 data, mask;
5548 
5549 	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
5550 	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
5551 
5552 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5553 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5554 
5555 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
5556 
5557 	return (~data) & mask;
5558 }
5559 
5560 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
5561 				 struct amdgpu_cu_info *cu_info)
5562 {
5563 	int i, j, k, counter, active_cu_number = 0;
5564 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5565 	unsigned disable_masks[4 * 2];
5566 
5567 	if (!adev || !cu_info)
5568 		return -EINVAL;
5569 
5570 	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
5571 
5572 	mutex_lock(&adev->grbm_idx_mutex);
5573 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5574 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5575 			mask = 1;
5576 			ao_bitmap = 0;
5577 			counter = 0;
5578 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
5579 			if (i < 4 && j < 2)
5580 				gfx_v9_0_set_user_cu_inactive_bitmap(
5581 					adev, disable_masks[i * 2 + j]);
5582 			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
5583 			cu_info->bitmap[i][j] = bitmap;
5584 
5585 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
5586 				if (bitmap & mask) {
5587 					if (counter < adev->gfx.config.max_cu_per_sh)
5588 						ao_bitmap |= mask;
5589 					counter ++;
5590 				}
5591 				mask <<= 1;
5592 			}
5593 			active_cu_number += counter;
5594 			if (i < 2 && j < 2)
5595 				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5596 			cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
5597 		}
5598 	}
5599 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5600 	mutex_unlock(&adev->grbm_idx_mutex);
5601 
5602 	cu_info->number = active_cu_number;
5603 	cu_info->ao_cu_mask = ao_cu_mask;
5604 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
5605 
5606 	return 0;
5607 }
5608 
5609 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
5610 {
5611 	.type = AMD_IP_BLOCK_TYPE_GFX,
5612 	.major = 9,
5613 	.minor = 0,
5614 	.rev = 0,
5615 	.funcs = &gfx_v9_0_ip_funcs,
5616 };
5617