xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c (revision 781095f903f398148cd0b646d3984234a715f29e)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vid.h"
29 #include "amdgpu_ucode.h"
30 #include "clearstate_vi.h"
31 
32 #include "gmc/gmc_8_2_d.h"
33 #include "gmc/gmc_8_2_sh_mask.h"
34 
35 #include "oss/oss_3_0_d.h"
36 #include "oss/oss_3_0_sh_mask.h"
37 
38 #include "bif/bif_5_0_d.h"
39 #include "bif/bif_5_0_sh_mask.h"
40 
41 #include "gca/gfx_8_0_d.h"
42 #include "gca/gfx_8_0_enum.h"
43 #include "gca/gfx_8_0_sh_mask.h"
44 #include "gca/gfx_8_0_enum.h"
45 
46 #include "uvd/uvd_5_0_d.h"
47 #include "uvd/uvd_5_0_sh_mask.h"
48 
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
51 
52 #define GFX8_NUM_GFX_RINGS     1
53 #define GFX8_NUM_COMPUTE_RINGS 8
54 
55 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
56 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
58 
59 #define ARRAY_MODE(x)					((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
60 #define PIPE_CONFIG(x)					((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
61 #define TILE_SPLIT(x)					((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
62 #define MICRO_TILE_MODE_NEW(x)				((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
63 #define SAMPLE_SPLIT(x)					((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
64 #define BANK_WIDTH(x)					((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
65 #define BANK_HEIGHT(x)					((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
66 #define MACRO_TILE_ASPECT(x)				((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
67 #define NUM_BANKS(x)					((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
68 
69 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
70 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
71 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
72 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
73 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
74 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
75 
76 /* BPM SERDES CMD */
77 #define SET_BPM_SERDES_CMD    1
78 #define CLE_BPM_SERDES_CMD    0
79 
80 /* BPM Register Address*/
81 enum {
82 	BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
83 	BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
84 	BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
85 	BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
86 	BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
87 	BPM_REG_FGCG_MAX
88 };
89 
90 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
91 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
92 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
93 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
94 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
95 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
96 
97 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
98 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
99 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
100 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
101 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
102 
103 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
104 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
105 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
106 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
107 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
109 
110 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
111 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
112 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
113 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
114 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
115 
116 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
117 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
118 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
119 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
120 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
122 
123 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
124 {
125 	{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
126 	{mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
127 	{mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
128 	{mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
129 	{mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
130 	{mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
131 	{mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
132 	{mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
133 	{mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
134 	{mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
135 	{mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
136 	{mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
137 	{mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
138 	{mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
139 	{mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
140 	{mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
141 };
142 
143 static const u32 golden_settings_tonga_a11[] =
144 {
145 	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
146 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
147 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
148 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
149 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
150 	mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
151 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
152 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
153 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
154 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
155 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
156 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
157 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
158 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
159 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
160 };
161 
162 static const u32 tonga_golden_common_all[] =
163 {
164 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
165 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
166 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
167 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
168 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
169 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
170 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
171 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
172 };
173 
174 static const u32 tonga_mgcg_cgcg_init[] =
175 {
176 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
177 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
178 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
179 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
180 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
181 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
182 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
183 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
184 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
185 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
186 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
187 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
188 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
189 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
190 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
191 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
192 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
193 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
194 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
195 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
196 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
197 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
198 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
199 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
200 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
201 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
202 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
203 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
204 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
205 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
206 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
207 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
208 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
209 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
210 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
211 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
212 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
213 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
214 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
215 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
216 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
217 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
218 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
219 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
220 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
221 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
222 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
223 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
224 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
225 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
226 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
227 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
228 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
229 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
230 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
231 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
232 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
233 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
234 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
235 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
236 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
237 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
238 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
239 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
240 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
241 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
242 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
243 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
244 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
245 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
246 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
247 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
248 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
249 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
250 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
251 };
252 
253 static const u32 fiji_golden_common_all[] =
254 {
255 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
256 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
257 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
258 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
259 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
260 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
261 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
262 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
263 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
264 	mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
265 };
266 
267 static const u32 golden_settings_fiji_a10[] =
268 {
269 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
270 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
271 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
272 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
273 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
274 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
275 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
276 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
277 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
278 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
279 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
280 };
281 
282 static const u32 fiji_mgcg_cgcg_init[] =
283 {
284 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
285 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
286 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
287 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
288 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
289 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
290 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
291 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
292 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
293 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
294 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
295 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
296 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
297 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
298 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
299 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
300 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
301 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
302 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
303 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
304 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
305 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
306 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
307 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
308 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
309 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
310 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
311 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
312 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
313 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
314 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
315 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
316 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
317 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
318 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
319 };
320 
321 static const u32 golden_settings_iceland_a11[] =
322 {
323 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
324 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
325 	mmDB_DEBUG3, 0xc0000000, 0xc0000000,
326 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
327 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
328 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
329 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
330 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
331 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
332 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
333 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
334 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
335 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
336 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
337 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
338 };
339 
340 static const u32 iceland_golden_common_all[] =
341 {
342 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
343 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
344 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
345 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
346 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
347 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
348 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
349 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
350 };
351 
352 static const u32 iceland_mgcg_cgcg_init[] =
353 {
354 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
355 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
356 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
357 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
358 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
359 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
360 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
361 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
362 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
363 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
364 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
365 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
366 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
367 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
368 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
369 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
370 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
371 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
372 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
373 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
374 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
375 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
376 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
377 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
378 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
379 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
380 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
381 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
382 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
383 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
384 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
385 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
386 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
387 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
388 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
389 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
390 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
391 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
392 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
393 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
394 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
395 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
396 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
397 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
398 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
399 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
400 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
401 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
402 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
403 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
404 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
405 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
406 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
407 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
408 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
409 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
410 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
411 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
412 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
413 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
414 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
415 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
416 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
417 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
418 };
419 
420 static const u32 cz_golden_settings_a11[] =
421 {
422 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
423 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
424 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
425 	mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
426 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
427 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
428 	mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
429 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
430 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
431 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
432 };
433 
434 static const u32 cz_golden_common_all[] =
435 {
436 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
437 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
438 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
439 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
440 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
441 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
442 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
443 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
444 };
445 
446 static const u32 cz_mgcg_cgcg_init[] =
447 {
448 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
449 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
450 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
451 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
452 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
453 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
454 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
455 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
456 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
457 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
458 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
459 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
460 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
461 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
462 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
463 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
464 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
465 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
466 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
467 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
468 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
469 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
470 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
471 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
472 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
473 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
474 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
475 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
476 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
477 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
478 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
479 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
480 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
481 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
482 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
483 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
484 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
485 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
486 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
487 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
488 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
489 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
490 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
491 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
492 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
493 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
494 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
495 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
496 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
497 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
498 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
499 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
500 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
501 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
502 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
503 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
504 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
505 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
506 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
507 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
508 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
509 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
510 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
511 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
512 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
513 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
514 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
515 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
516 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
517 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
518 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
519 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
520 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
521 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
522 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
523 };
524 
525 static const u32 stoney_golden_settings_a11[] =
526 {
527 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
528 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
529 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
530 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
531 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
532 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
533   	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
534 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
535 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
536 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
537 };
538 
539 static const u32 stoney_golden_common_all[] =
540 {
541 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
542 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
543 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
544 	mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
545 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
546 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
547 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
548 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
549 };
550 
551 static const u32 stoney_mgcg_cgcg_init[] =
552 {
553 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
554 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
555 	mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
556 	mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
557 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
558 	mmATC_MISC_CG, 0xffffffff, 0x000c0200,
559 };
560 
561 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
562 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
563 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
564 
565 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
566 {
567 	switch (adev->asic_type) {
568 	case CHIP_TOPAZ:
569 		amdgpu_program_register_sequence(adev,
570 						 iceland_mgcg_cgcg_init,
571 						 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
572 		amdgpu_program_register_sequence(adev,
573 						 golden_settings_iceland_a11,
574 						 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
575 		amdgpu_program_register_sequence(adev,
576 						 iceland_golden_common_all,
577 						 (const u32)ARRAY_SIZE(iceland_golden_common_all));
578 		break;
579 	case CHIP_FIJI:
580 		amdgpu_program_register_sequence(adev,
581 						 fiji_mgcg_cgcg_init,
582 						 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
583 		amdgpu_program_register_sequence(adev,
584 						 golden_settings_fiji_a10,
585 						 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
586 		amdgpu_program_register_sequence(adev,
587 						 fiji_golden_common_all,
588 						 (const u32)ARRAY_SIZE(fiji_golden_common_all));
589 		break;
590 
591 	case CHIP_TONGA:
592 		amdgpu_program_register_sequence(adev,
593 						 tonga_mgcg_cgcg_init,
594 						 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
595 		amdgpu_program_register_sequence(adev,
596 						 golden_settings_tonga_a11,
597 						 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
598 		amdgpu_program_register_sequence(adev,
599 						 tonga_golden_common_all,
600 						 (const u32)ARRAY_SIZE(tonga_golden_common_all));
601 		break;
602 	case CHIP_CARRIZO:
603 		amdgpu_program_register_sequence(adev,
604 						 cz_mgcg_cgcg_init,
605 						 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
606 		amdgpu_program_register_sequence(adev,
607 						 cz_golden_settings_a11,
608 						 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
609 		amdgpu_program_register_sequence(adev,
610 						 cz_golden_common_all,
611 						 (const u32)ARRAY_SIZE(cz_golden_common_all));
612 		break;
613 	case CHIP_STONEY:
614 		amdgpu_program_register_sequence(adev,
615 						 stoney_mgcg_cgcg_init,
616 						 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
617 		amdgpu_program_register_sequence(adev,
618 						 stoney_golden_settings_a11,
619 						 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
620 		amdgpu_program_register_sequence(adev,
621 						 stoney_golden_common_all,
622 						 (const u32)ARRAY_SIZE(stoney_golden_common_all));
623 		break;
624 	default:
625 		break;
626 	}
627 }
628 
629 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
630 {
631 	int i;
632 
633 	adev->gfx.scratch.num_reg = 7;
634 	adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
635 	for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
636 		adev->gfx.scratch.free[i] = true;
637 		adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
638 	}
639 }
640 
641 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
642 {
643 	struct amdgpu_device *adev = ring->adev;
644 	uint32_t scratch;
645 	uint32_t tmp = 0;
646 	unsigned i;
647 	int r;
648 
649 	r = amdgpu_gfx_scratch_get(adev, &scratch);
650 	if (r) {
651 		DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
652 		return r;
653 	}
654 	WREG32(scratch, 0xCAFEDEAD);
655 	r = amdgpu_ring_alloc(ring, 3);
656 	if (r) {
657 		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
658 			  ring->idx, r);
659 		amdgpu_gfx_scratch_free(adev, scratch);
660 		return r;
661 	}
662 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
663 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
664 	amdgpu_ring_write(ring, 0xDEADBEEF);
665 	amdgpu_ring_commit(ring);
666 
667 	for (i = 0; i < adev->usec_timeout; i++) {
668 		tmp = RREG32(scratch);
669 		if (tmp == 0xDEADBEEF)
670 			break;
671 		DRM_UDELAY(1);
672 	}
673 	if (i < adev->usec_timeout) {
674 		DRM_INFO("ring test on %d succeeded in %d usecs\n",
675 			 ring->idx, i);
676 	} else {
677 		DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
678 			  ring->idx, scratch, tmp);
679 		r = -EINVAL;
680 	}
681 	amdgpu_gfx_scratch_free(adev, scratch);
682 	return r;
683 }
684 
685 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
686 {
687 	struct amdgpu_device *adev = ring->adev;
688 	struct amdgpu_ib ib;
689 	struct fence *f = NULL;
690 	uint32_t scratch;
691 	uint32_t tmp = 0;
692 	unsigned i;
693 	int r;
694 
695 	r = amdgpu_gfx_scratch_get(adev, &scratch);
696 	if (r) {
697 		DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
698 		return r;
699 	}
700 	WREG32(scratch, 0xCAFEDEAD);
701 	memset(&ib, 0, sizeof(ib));
702 	r = amdgpu_ib_get(adev, NULL, 256, &ib);
703 	if (r) {
704 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
705 		goto err1;
706 	}
707 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
708 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
709 	ib.ptr[2] = 0xDEADBEEF;
710 	ib.length_dw = 3;
711 
712 	r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED,
713 			       NULL, &f);
714 	if (r)
715 		goto err2;
716 
717 	r = fence_wait(f, false);
718 	if (r) {
719 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
720 		goto err2;
721 	}
722 	for (i = 0; i < adev->usec_timeout; i++) {
723 		tmp = RREG32(scratch);
724 		if (tmp == 0xDEADBEEF)
725 			break;
726 		DRM_UDELAY(1);
727 	}
728 	if (i < adev->usec_timeout) {
729 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
730 			 ring->idx, i);
731 		goto err2;
732 	} else {
733 		DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
734 			  scratch, tmp);
735 		r = -EINVAL;
736 	}
737 err2:
738 	fence_put(f);
739 	amdgpu_ib_free(adev, &ib);
740 err1:
741 	amdgpu_gfx_scratch_free(adev, scratch);
742 	return r;
743 }
744 
745 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
746 {
747 	const char *chip_name;
748 	char fw_name[30];
749 	int err;
750 	struct amdgpu_firmware_info *info = NULL;
751 	const struct common_firmware_header *header = NULL;
752 	const struct gfx_firmware_header_v1_0 *cp_hdr;
753 
754 	DRM_DEBUG("\n");
755 
756 	switch (adev->asic_type) {
757 	case CHIP_TOPAZ:
758 		chip_name = "topaz";
759 		break;
760 	case CHIP_TONGA:
761 		chip_name = "tonga";
762 		break;
763 	case CHIP_CARRIZO:
764 		chip_name = "carrizo";
765 		break;
766 	case CHIP_FIJI:
767 		chip_name = "fiji";
768 		break;
769 	case CHIP_STONEY:
770 		chip_name = "stoney";
771 		break;
772 	default:
773 		BUG();
774 	}
775 
776 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
777 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
778 	if (err)
779 		goto out;
780 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
781 	if (err)
782 		goto out;
783 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
784 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
785 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
786 
787 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
788 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
789 	if (err)
790 		goto out;
791 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
792 	if (err)
793 		goto out;
794 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
795 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
796 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
797 
798 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
799 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
800 	if (err)
801 		goto out;
802 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
803 	if (err)
804 		goto out;
805 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
806 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
807 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
808 
809 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
810 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
811 	if (err)
812 		goto out;
813 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
814 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
815 	adev->gfx.rlc_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
816 	adev->gfx.rlc_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
817 
818 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
819 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
820 	if (err)
821 		goto out;
822 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
823 	if (err)
824 		goto out;
825 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
826 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
827 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
828 
829 	if ((adev->asic_type != CHIP_STONEY) &&
830 	    (adev->asic_type != CHIP_TOPAZ)) {
831 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
832 		err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
833 		if (!err) {
834 			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
835 			if (err)
836 				goto out;
837 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
838 				adev->gfx.mec2_fw->data;
839 			adev->gfx.mec2_fw_version =
840 				le32_to_cpu(cp_hdr->header.ucode_version);
841 			adev->gfx.mec2_feature_version =
842 				le32_to_cpu(cp_hdr->ucode_feature_version);
843 		} else {
844 			err = 0;
845 			adev->gfx.mec2_fw = NULL;
846 		}
847 	}
848 
849 	if (adev->firmware.smu_load) {
850 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
851 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
852 		info->fw = adev->gfx.pfp_fw;
853 		header = (const struct common_firmware_header *)info->fw->data;
854 		adev->firmware.fw_size +=
855 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
856 
857 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
858 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
859 		info->fw = adev->gfx.me_fw;
860 		header = (const struct common_firmware_header *)info->fw->data;
861 		adev->firmware.fw_size +=
862 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
863 
864 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
865 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
866 		info->fw = adev->gfx.ce_fw;
867 		header = (const struct common_firmware_header *)info->fw->data;
868 		adev->firmware.fw_size +=
869 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
870 
871 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
872 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
873 		info->fw = adev->gfx.rlc_fw;
874 		header = (const struct common_firmware_header *)info->fw->data;
875 		adev->firmware.fw_size +=
876 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
877 
878 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
879 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
880 		info->fw = adev->gfx.mec_fw;
881 		header = (const struct common_firmware_header *)info->fw->data;
882 		adev->firmware.fw_size +=
883 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
884 
885 		if (adev->gfx.mec2_fw) {
886 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
887 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
888 			info->fw = adev->gfx.mec2_fw;
889 			header = (const struct common_firmware_header *)info->fw->data;
890 			adev->firmware.fw_size +=
891 				ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
892 		}
893 
894 	}
895 
896 out:
897 	if (err) {
898 		dev_err(adev->dev,
899 			"gfx8: Failed to load firmware \"%s\"\n",
900 			fw_name);
901 		release_firmware(adev->gfx.pfp_fw);
902 		adev->gfx.pfp_fw = NULL;
903 		release_firmware(adev->gfx.me_fw);
904 		adev->gfx.me_fw = NULL;
905 		release_firmware(adev->gfx.ce_fw);
906 		adev->gfx.ce_fw = NULL;
907 		release_firmware(adev->gfx.rlc_fw);
908 		adev->gfx.rlc_fw = NULL;
909 		release_firmware(adev->gfx.mec_fw);
910 		adev->gfx.mec_fw = NULL;
911 		release_firmware(adev->gfx.mec2_fw);
912 		adev->gfx.mec2_fw = NULL;
913 	}
914 	return err;
915 }
916 
917 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
918 {
919 	int r;
920 
921 	if (adev->gfx.mec.hpd_eop_obj) {
922 		r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
923 		if (unlikely(r != 0))
924 			dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
925 		amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
926 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
927 
928 		amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
929 		adev->gfx.mec.hpd_eop_obj = NULL;
930 	}
931 }
932 
933 #define MEC_HPD_SIZE 2048
934 
935 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
936 {
937 	int r;
938 	u32 *hpd;
939 
940 	/*
941 	 * we assign only 1 pipe because all other pipes will
942 	 * be handled by KFD
943 	 */
944 	adev->gfx.mec.num_mec = 1;
945 	adev->gfx.mec.num_pipe = 1;
946 	adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
947 
948 	if (adev->gfx.mec.hpd_eop_obj == NULL) {
949 		r = amdgpu_bo_create(adev,
950 				     adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
951 				     PAGE_SIZE, true,
952 				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
953 				     &adev->gfx.mec.hpd_eop_obj);
954 		if (r) {
955 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
956 			return r;
957 		}
958 	}
959 
960 	r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
961 	if (unlikely(r != 0)) {
962 		gfx_v8_0_mec_fini(adev);
963 		return r;
964 	}
965 	r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
966 			  &adev->gfx.mec.hpd_eop_gpu_addr);
967 	if (r) {
968 		dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
969 		gfx_v8_0_mec_fini(adev);
970 		return r;
971 	}
972 	r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
973 	if (r) {
974 		dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
975 		gfx_v8_0_mec_fini(adev);
976 		return r;
977 	}
978 
979 	memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
980 
981 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
982 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
983 
984 	return 0;
985 }
986 
987 static const u32 vgpr_init_compute_shader[] =
988 {
989 	0x7e000209, 0x7e020208,
990 	0x7e040207, 0x7e060206,
991 	0x7e080205, 0x7e0a0204,
992 	0x7e0c0203, 0x7e0e0202,
993 	0x7e100201, 0x7e120200,
994 	0x7e140209, 0x7e160208,
995 	0x7e180207, 0x7e1a0206,
996 	0x7e1c0205, 0x7e1e0204,
997 	0x7e200203, 0x7e220202,
998 	0x7e240201, 0x7e260200,
999 	0x7e280209, 0x7e2a0208,
1000 	0x7e2c0207, 0x7e2e0206,
1001 	0x7e300205, 0x7e320204,
1002 	0x7e340203, 0x7e360202,
1003 	0x7e380201, 0x7e3a0200,
1004 	0x7e3c0209, 0x7e3e0208,
1005 	0x7e400207, 0x7e420206,
1006 	0x7e440205, 0x7e460204,
1007 	0x7e480203, 0x7e4a0202,
1008 	0x7e4c0201, 0x7e4e0200,
1009 	0x7e500209, 0x7e520208,
1010 	0x7e540207, 0x7e560206,
1011 	0x7e580205, 0x7e5a0204,
1012 	0x7e5c0203, 0x7e5e0202,
1013 	0x7e600201, 0x7e620200,
1014 	0x7e640209, 0x7e660208,
1015 	0x7e680207, 0x7e6a0206,
1016 	0x7e6c0205, 0x7e6e0204,
1017 	0x7e700203, 0x7e720202,
1018 	0x7e740201, 0x7e760200,
1019 	0x7e780209, 0x7e7a0208,
1020 	0x7e7c0207, 0x7e7e0206,
1021 	0xbf8a0000, 0xbf810000,
1022 };
1023 
1024 static const u32 sgpr_init_compute_shader[] =
1025 {
1026 	0xbe8a0100, 0xbe8c0102,
1027 	0xbe8e0104, 0xbe900106,
1028 	0xbe920108, 0xbe940100,
1029 	0xbe960102, 0xbe980104,
1030 	0xbe9a0106, 0xbe9c0108,
1031 	0xbe9e0100, 0xbea00102,
1032 	0xbea20104, 0xbea40106,
1033 	0xbea60108, 0xbea80100,
1034 	0xbeaa0102, 0xbeac0104,
1035 	0xbeae0106, 0xbeb00108,
1036 	0xbeb20100, 0xbeb40102,
1037 	0xbeb60104, 0xbeb80106,
1038 	0xbeba0108, 0xbebc0100,
1039 	0xbebe0102, 0xbec00104,
1040 	0xbec20106, 0xbec40108,
1041 	0xbec60100, 0xbec80102,
1042 	0xbee60004, 0xbee70005,
1043 	0xbeea0006, 0xbeeb0007,
1044 	0xbee80008, 0xbee90009,
1045 	0xbefc0000, 0xbf8a0000,
1046 	0xbf810000, 0x00000000,
1047 };
1048 
1049 static const u32 vgpr_init_regs[] =
1050 {
1051 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1052 	mmCOMPUTE_RESOURCE_LIMITS, 0,
1053 	mmCOMPUTE_NUM_THREAD_X, 256*4,
1054 	mmCOMPUTE_NUM_THREAD_Y, 1,
1055 	mmCOMPUTE_NUM_THREAD_Z, 1,
1056 	mmCOMPUTE_PGM_RSRC2, 20,
1057 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1058 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1059 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1060 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1061 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1062 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1063 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1064 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1065 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1066 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1067 };
1068 
1069 static const u32 sgpr1_init_regs[] =
1070 {
1071 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1072 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1073 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1074 	mmCOMPUTE_NUM_THREAD_Y, 1,
1075 	mmCOMPUTE_NUM_THREAD_Z, 1,
1076 	mmCOMPUTE_PGM_RSRC2, 20,
1077 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1078 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1079 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1080 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1081 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1082 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1083 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1084 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1085 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1086 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1087 };
1088 
1089 static const u32 sgpr2_init_regs[] =
1090 {
1091 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1092 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1093 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1094 	mmCOMPUTE_NUM_THREAD_Y, 1,
1095 	mmCOMPUTE_NUM_THREAD_Z, 1,
1096 	mmCOMPUTE_PGM_RSRC2, 20,
1097 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1098 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1099 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1100 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1101 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1102 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1103 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1104 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1105 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1106 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1107 };
1108 
1109 static const u32 sec_ded_counter_registers[] =
1110 {
1111 	mmCPC_EDC_ATC_CNT,
1112 	mmCPC_EDC_SCRATCH_CNT,
1113 	mmCPC_EDC_UCODE_CNT,
1114 	mmCPF_EDC_ATC_CNT,
1115 	mmCPF_EDC_ROQ_CNT,
1116 	mmCPF_EDC_TAG_CNT,
1117 	mmCPG_EDC_ATC_CNT,
1118 	mmCPG_EDC_DMA_CNT,
1119 	mmCPG_EDC_TAG_CNT,
1120 	mmDC_EDC_CSINVOC_CNT,
1121 	mmDC_EDC_RESTORE_CNT,
1122 	mmDC_EDC_STATE_CNT,
1123 	mmGDS_EDC_CNT,
1124 	mmGDS_EDC_GRBM_CNT,
1125 	mmGDS_EDC_OA_DED,
1126 	mmSPI_EDC_CNT,
1127 	mmSQC_ATC_EDC_GATCL1_CNT,
1128 	mmSQC_EDC_CNT,
1129 	mmSQ_EDC_DED_CNT,
1130 	mmSQ_EDC_INFO,
1131 	mmSQ_EDC_SEC_CNT,
1132 	mmTCC_EDC_CNT,
1133 	mmTCP_ATC_EDC_GATCL1_CNT,
1134 	mmTCP_EDC_CNT,
1135 	mmTD_EDC_CNT
1136 };
1137 
1138 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1139 {
1140 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1141 	struct amdgpu_ib ib;
1142 	struct fence *f = NULL;
1143 	int r, i;
1144 	u32 tmp;
1145 	unsigned total_size, vgpr_offset, sgpr_offset;
1146 	u64 gpu_addr;
1147 
1148 	/* only supported on CZ */
1149 	if (adev->asic_type != CHIP_CARRIZO)
1150 		return 0;
1151 
1152 	/* bail if the compute ring is not ready */
1153 	if (!ring->ready)
1154 		return 0;
1155 
1156 	tmp = RREG32(mmGB_EDC_MODE);
1157 	WREG32(mmGB_EDC_MODE, 0);
1158 
1159 	total_size =
1160 		(((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1161 	total_size +=
1162 		(((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1163 	total_size +=
1164 		(((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1165 	total_size = ALIGN(total_size, 256);
1166 	vgpr_offset = total_size;
1167 	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1168 	sgpr_offset = total_size;
1169 	total_size += sizeof(sgpr_init_compute_shader);
1170 
1171 	/* allocate an indirect buffer to put the commands in */
1172 	memset(&ib, 0, sizeof(ib));
1173 	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1174 	if (r) {
1175 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1176 		return r;
1177 	}
1178 
1179 	/* load the compute shaders */
1180 	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1181 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1182 
1183 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1184 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1185 
1186 	/* init the ib length to 0 */
1187 	ib.length_dw = 0;
1188 
1189 	/* VGPR */
1190 	/* write the register state for the compute dispatch */
1191 	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1192 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1193 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1194 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1195 	}
1196 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1197 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1198 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1199 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1200 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1201 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1202 
1203 	/* write dispatch packet */
1204 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1205 	ib.ptr[ib.length_dw++] = 8; /* x */
1206 	ib.ptr[ib.length_dw++] = 1; /* y */
1207 	ib.ptr[ib.length_dw++] = 1; /* z */
1208 	ib.ptr[ib.length_dw++] =
1209 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1210 
1211 	/* write CS partial flush packet */
1212 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1213 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1214 
1215 	/* SGPR1 */
1216 	/* write the register state for the compute dispatch */
1217 	for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1218 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1219 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1220 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1221 	}
1222 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1223 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1224 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1225 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1226 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1227 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1228 
1229 	/* write dispatch packet */
1230 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1231 	ib.ptr[ib.length_dw++] = 8; /* x */
1232 	ib.ptr[ib.length_dw++] = 1; /* y */
1233 	ib.ptr[ib.length_dw++] = 1; /* z */
1234 	ib.ptr[ib.length_dw++] =
1235 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1236 
1237 	/* write CS partial flush packet */
1238 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1239 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1240 
1241 	/* SGPR2 */
1242 	/* write the register state for the compute dispatch */
1243 	for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1244 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1245 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1246 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1247 	}
1248 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1249 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1250 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1251 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1252 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1253 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1254 
1255 	/* write dispatch packet */
1256 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1257 	ib.ptr[ib.length_dw++] = 8; /* x */
1258 	ib.ptr[ib.length_dw++] = 1; /* y */
1259 	ib.ptr[ib.length_dw++] = 1; /* z */
1260 	ib.ptr[ib.length_dw++] =
1261 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1262 
1263 	/* write CS partial flush packet */
1264 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1265 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1266 
1267 	/* shedule the ib on the ring */
1268 	r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED,
1269 			       NULL, &f);
1270 	if (r) {
1271 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1272 		goto fail;
1273 	}
1274 
1275 	/* wait for the GPU to finish processing the IB */
1276 	r = fence_wait(f, false);
1277 	if (r) {
1278 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1279 		goto fail;
1280 	}
1281 
1282 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1283 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1284 	WREG32(mmGB_EDC_MODE, tmp);
1285 
1286 	tmp = RREG32(mmCC_GC_EDC_CONFIG);
1287 	tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1288 	WREG32(mmCC_GC_EDC_CONFIG, tmp);
1289 
1290 
1291 	/* read back registers to clear the counters */
1292 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1293 		RREG32(sec_ded_counter_registers[i]);
1294 
1295 fail:
1296 	fence_put(f);
1297 	amdgpu_ib_free(adev, &ib);
1298 
1299 	return r;
1300 }
1301 
1302 static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1303 {
1304 	u32 gb_addr_config;
1305 	u32 mc_shared_chmap, mc_arb_ramcfg;
1306 	u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1307 	u32 tmp;
1308 
1309 	switch (adev->asic_type) {
1310 	case CHIP_TOPAZ:
1311 		adev->gfx.config.max_shader_engines = 1;
1312 		adev->gfx.config.max_tile_pipes = 2;
1313 		adev->gfx.config.max_cu_per_sh = 6;
1314 		adev->gfx.config.max_sh_per_se = 1;
1315 		adev->gfx.config.max_backends_per_se = 2;
1316 		adev->gfx.config.max_texture_channel_caches = 2;
1317 		adev->gfx.config.max_gprs = 256;
1318 		adev->gfx.config.max_gs_threads = 32;
1319 		adev->gfx.config.max_hw_contexts = 8;
1320 
1321 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1322 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1323 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1324 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1325 		gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1326 		break;
1327 	case CHIP_FIJI:
1328 		adev->gfx.config.max_shader_engines = 4;
1329 		adev->gfx.config.max_tile_pipes = 16;
1330 		adev->gfx.config.max_cu_per_sh = 16;
1331 		adev->gfx.config.max_sh_per_se = 1;
1332 		adev->gfx.config.max_backends_per_se = 4;
1333 		adev->gfx.config.max_texture_channel_caches = 16;
1334 		adev->gfx.config.max_gprs = 256;
1335 		adev->gfx.config.max_gs_threads = 32;
1336 		adev->gfx.config.max_hw_contexts = 8;
1337 
1338 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1339 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1340 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1341 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1342 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1343 		break;
1344 	case CHIP_TONGA:
1345 		adev->gfx.config.max_shader_engines = 4;
1346 		adev->gfx.config.max_tile_pipes = 8;
1347 		adev->gfx.config.max_cu_per_sh = 8;
1348 		adev->gfx.config.max_sh_per_se = 1;
1349 		adev->gfx.config.max_backends_per_se = 2;
1350 		adev->gfx.config.max_texture_channel_caches = 8;
1351 		adev->gfx.config.max_gprs = 256;
1352 		adev->gfx.config.max_gs_threads = 32;
1353 		adev->gfx.config.max_hw_contexts = 8;
1354 
1355 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1356 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1357 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1358 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1359 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1360 		break;
1361 	case CHIP_CARRIZO:
1362 		adev->gfx.config.max_shader_engines = 1;
1363 		adev->gfx.config.max_tile_pipes = 2;
1364 		adev->gfx.config.max_sh_per_se = 1;
1365 		adev->gfx.config.max_backends_per_se = 2;
1366 
1367 		switch (adev->pdev->revision) {
1368 		case 0xc4:
1369 		case 0x84:
1370 		case 0xc8:
1371 		case 0xcc:
1372 		case 0xe1:
1373 		case 0xe3:
1374 			/* B10 */
1375 			adev->gfx.config.max_cu_per_sh = 8;
1376 			break;
1377 		case 0xc5:
1378 		case 0x81:
1379 		case 0x85:
1380 		case 0xc9:
1381 		case 0xcd:
1382 		case 0xe2:
1383 		case 0xe4:
1384 			/* B8 */
1385 			adev->gfx.config.max_cu_per_sh = 6;
1386 			break;
1387 		case 0xc6:
1388 		case 0xca:
1389 		case 0xce:
1390 		case 0x88:
1391 			/* B6 */
1392 			adev->gfx.config.max_cu_per_sh = 6;
1393 			break;
1394 		case 0xc7:
1395 		case 0x87:
1396 		case 0xcb:
1397 		case 0xe5:
1398 		case 0x89:
1399 		default:
1400 			/* B4 */
1401 			adev->gfx.config.max_cu_per_sh = 4;
1402 			break;
1403 		}
1404 
1405 		adev->gfx.config.max_texture_channel_caches = 2;
1406 		adev->gfx.config.max_gprs = 256;
1407 		adev->gfx.config.max_gs_threads = 32;
1408 		adev->gfx.config.max_hw_contexts = 8;
1409 
1410 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1411 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1412 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1413 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1414 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1415 		break;
1416 	case CHIP_STONEY:
1417 		adev->gfx.config.max_shader_engines = 1;
1418 		adev->gfx.config.max_tile_pipes = 2;
1419 		adev->gfx.config.max_sh_per_se = 1;
1420 		adev->gfx.config.max_backends_per_se = 1;
1421 
1422 		switch (adev->pdev->revision) {
1423 		case 0xc0:
1424 		case 0xc1:
1425 		case 0xc2:
1426 		case 0xc4:
1427 		case 0xc8:
1428 		case 0xc9:
1429 			adev->gfx.config.max_cu_per_sh = 3;
1430 			break;
1431 		case 0xd0:
1432 		case 0xd1:
1433 		case 0xd2:
1434 		default:
1435 			adev->gfx.config.max_cu_per_sh = 2;
1436 			break;
1437 		}
1438 
1439 		adev->gfx.config.max_texture_channel_caches = 2;
1440 		adev->gfx.config.max_gprs = 256;
1441 		adev->gfx.config.max_gs_threads = 16;
1442 		adev->gfx.config.max_hw_contexts = 8;
1443 
1444 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1445 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1446 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1447 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1448 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1449 		break;
1450 	default:
1451 		adev->gfx.config.max_shader_engines = 2;
1452 		adev->gfx.config.max_tile_pipes = 4;
1453 		adev->gfx.config.max_cu_per_sh = 2;
1454 		adev->gfx.config.max_sh_per_se = 1;
1455 		adev->gfx.config.max_backends_per_se = 2;
1456 		adev->gfx.config.max_texture_channel_caches = 4;
1457 		adev->gfx.config.max_gprs = 256;
1458 		adev->gfx.config.max_gs_threads = 32;
1459 		adev->gfx.config.max_hw_contexts = 8;
1460 
1461 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1462 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1463 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1464 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1465 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1466 		break;
1467 	}
1468 
1469 	mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1470 	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1471 	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1472 
1473 	adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1474 	adev->gfx.config.mem_max_burst_length_bytes = 256;
1475 	if (adev->flags & AMD_IS_APU) {
1476 		/* Get memory bank mapping mode. */
1477 		tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1478 		dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1479 		dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1480 
1481 		tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1482 		dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1483 		dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1484 
1485 		/* Validate settings in case only one DIMM installed. */
1486 		if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1487 			dimm00_addr_map = 0;
1488 		if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1489 			dimm01_addr_map = 0;
1490 		if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1491 			dimm10_addr_map = 0;
1492 		if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1493 			dimm11_addr_map = 0;
1494 
1495 		/* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1496 		/* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1497 		if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1498 			adev->gfx.config.mem_row_size_in_kb = 2;
1499 		else
1500 			adev->gfx.config.mem_row_size_in_kb = 1;
1501 	} else {
1502 		tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1503 		adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1504 		if (adev->gfx.config.mem_row_size_in_kb > 4)
1505 			adev->gfx.config.mem_row_size_in_kb = 4;
1506 	}
1507 
1508 	adev->gfx.config.shader_engine_tile_size = 32;
1509 	adev->gfx.config.num_gpus = 1;
1510 	adev->gfx.config.multi_gpu_tile_size = 64;
1511 
1512 	/* fix up row size */
1513 	switch (adev->gfx.config.mem_row_size_in_kb) {
1514 	case 1:
1515 	default:
1516 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1517 		break;
1518 	case 2:
1519 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1520 		break;
1521 	case 4:
1522 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1523 		break;
1524 	}
1525 	adev->gfx.config.gb_addr_config = gb_addr_config;
1526 }
1527 
1528 static int gfx_v8_0_sw_init(void *handle)
1529 {
1530 	int i, r;
1531 	struct amdgpu_ring *ring;
1532 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1533 
1534 	/* EOP Event */
1535 	r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1536 	if (r)
1537 		return r;
1538 
1539 	/* Privileged reg */
1540 	r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1541 	if (r)
1542 		return r;
1543 
1544 	/* Privileged inst */
1545 	r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
1546 	if (r)
1547 		return r;
1548 
1549 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1550 
1551 	gfx_v8_0_scratch_init(adev);
1552 
1553 	r = gfx_v8_0_init_microcode(adev);
1554 	if (r) {
1555 		DRM_ERROR("Failed to load gfx firmware!\n");
1556 		return r;
1557 	}
1558 
1559 	r = gfx_v8_0_mec_init(adev);
1560 	if (r) {
1561 		DRM_ERROR("Failed to init MEC BOs!\n");
1562 		return r;
1563 	}
1564 
1565 	/* set up the gfx ring */
1566 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1567 		ring = &adev->gfx.gfx_ring[i];
1568 		ring->ring_obj = NULL;
1569 		sprintf(ring->name, "gfx");
1570 		/* no gfx doorbells on iceland */
1571 		if (adev->asic_type != CHIP_TOPAZ) {
1572 			ring->use_doorbell = true;
1573 			ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
1574 		}
1575 
1576 		r = amdgpu_ring_init(adev, ring, 1024 * 1024,
1577 				     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1578 				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
1579 				     AMDGPU_RING_TYPE_GFX);
1580 		if (r)
1581 			return r;
1582 	}
1583 
1584 	/* set up the compute queues */
1585 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1586 		unsigned irq_type;
1587 
1588 		/* max 32 queues per MEC */
1589 		if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
1590 			DRM_ERROR("Too many (%d) compute rings!\n", i);
1591 			break;
1592 		}
1593 		ring = &adev->gfx.compute_ring[i];
1594 		ring->ring_obj = NULL;
1595 		ring->use_doorbell = true;
1596 		ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
1597 		ring->me = 1; /* first MEC */
1598 		ring->pipe = i / 8;
1599 		ring->queue = i % 8;
1600 		sprintf(ring->name, "comp %d.%d.%d", ring->me, ring->pipe, ring->queue);
1601 		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
1602 		/* type-2 packets are deprecated on MEC, use type-3 instead */
1603 		r = amdgpu_ring_init(adev, ring, 1024 * 1024,
1604 				     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1605 				     &adev->gfx.eop_irq, irq_type,
1606 				     AMDGPU_RING_TYPE_COMPUTE);
1607 		if (r)
1608 			return r;
1609 	}
1610 
1611 	/* reserve GDS, GWS and OA resource for gfx */
1612 	r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
1613 			PAGE_SIZE, true,
1614 			AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
1615 			NULL, &adev->gds.gds_gfx_bo);
1616 	if (r)
1617 		return r;
1618 
1619 	r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
1620 		PAGE_SIZE, true,
1621 		AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
1622 		NULL, &adev->gds.gws_gfx_bo);
1623 	if (r)
1624 		return r;
1625 
1626 	r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
1627 			PAGE_SIZE, true,
1628 			AMDGPU_GEM_DOMAIN_OA, 0, NULL,
1629 			NULL, &adev->gds.oa_gfx_bo);
1630 	if (r)
1631 		return r;
1632 
1633 	adev->gfx.ce_ram_size = 0x8000;
1634 
1635 	gfx_v8_0_gpu_early_init(adev);
1636 
1637 	return 0;
1638 }
1639 
1640 static int gfx_v8_0_sw_fini(void *handle)
1641 {
1642 	int i;
1643 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1644 
1645 	amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
1646 	amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
1647 	amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
1648 
1649 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1650 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1651 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
1652 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1653 
1654 	gfx_v8_0_mec_fini(adev);
1655 
1656 	return 0;
1657 }
1658 
1659 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
1660 {
1661 	uint32_t *modearray, *mod2array;
1662 	const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
1663 	const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
1664 	u32 reg_offset;
1665 
1666 	modearray = adev->gfx.config.tile_mode_array;
1667 	mod2array = adev->gfx.config.macrotile_mode_array;
1668 
1669 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1670 		modearray[reg_offset] = 0;
1671 
1672 	for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
1673 		mod2array[reg_offset] = 0;
1674 
1675 	switch (adev->asic_type) {
1676 	case CHIP_TOPAZ:
1677 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1678 				PIPE_CONFIG(ADDR_SURF_P2) |
1679 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1680 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1681 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1682 				PIPE_CONFIG(ADDR_SURF_P2) |
1683 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1684 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1685 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1686 				PIPE_CONFIG(ADDR_SURF_P2) |
1687 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1688 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1689 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1690 				PIPE_CONFIG(ADDR_SURF_P2) |
1691 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1692 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1693 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1694 				PIPE_CONFIG(ADDR_SURF_P2) |
1695 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1696 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1697 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1698 				PIPE_CONFIG(ADDR_SURF_P2) |
1699 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1700 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1701 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1702 				PIPE_CONFIG(ADDR_SURF_P2) |
1703 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1704 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1705 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1706 				PIPE_CONFIG(ADDR_SURF_P2));
1707 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1708 				PIPE_CONFIG(ADDR_SURF_P2) |
1709 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1710 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1711 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1712 				 PIPE_CONFIG(ADDR_SURF_P2) |
1713 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1714 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1715 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1716 				 PIPE_CONFIG(ADDR_SURF_P2) |
1717 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1718 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1719 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1720 				 PIPE_CONFIG(ADDR_SURF_P2) |
1721 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1722 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1723 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1724 				 PIPE_CONFIG(ADDR_SURF_P2) |
1725 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1726 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1727 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1728 				 PIPE_CONFIG(ADDR_SURF_P2) |
1729 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1730 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1731 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1732 				 PIPE_CONFIG(ADDR_SURF_P2) |
1733 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1734 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1735 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1736 				 PIPE_CONFIG(ADDR_SURF_P2) |
1737 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1738 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1739 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1740 				 PIPE_CONFIG(ADDR_SURF_P2) |
1741 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1742 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1743 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1744 				 PIPE_CONFIG(ADDR_SURF_P2) |
1745 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1746 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1747 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1748 				 PIPE_CONFIG(ADDR_SURF_P2) |
1749 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1750 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1751 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1752 				 PIPE_CONFIG(ADDR_SURF_P2) |
1753 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1754 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1755 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1756 				 PIPE_CONFIG(ADDR_SURF_P2) |
1757 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1758 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1759 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1760 				 PIPE_CONFIG(ADDR_SURF_P2) |
1761 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1762 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1763 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1764 				 PIPE_CONFIG(ADDR_SURF_P2) |
1765 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1766 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1767 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1768 				 PIPE_CONFIG(ADDR_SURF_P2) |
1769 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1770 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1771 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1772 				 PIPE_CONFIG(ADDR_SURF_P2) |
1773 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1774 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1775 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1776 				 PIPE_CONFIG(ADDR_SURF_P2) |
1777 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1778 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1779 
1780 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1781 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1782 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1783 				NUM_BANKS(ADDR_SURF_8_BANK));
1784 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1785 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1786 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1787 				NUM_BANKS(ADDR_SURF_8_BANK));
1788 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1789 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1790 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1791 				NUM_BANKS(ADDR_SURF_8_BANK));
1792 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1793 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1794 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1795 				NUM_BANKS(ADDR_SURF_8_BANK));
1796 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1797 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1798 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1799 				NUM_BANKS(ADDR_SURF_8_BANK));
1800 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1801 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1802 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1803 				NUM_BANKS(ADDR_SURF_8_BANK));
1804 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1805 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1806 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1807 				NUM_BANKS(ADDR_SURF_8_BANK));
1808 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1809 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1810 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1811 				NUM_BANKS(ADDR_SURF_16_BANK));
1812 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1813 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1814 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1815 				NUM_BANKS(ADDR_SURF_16_BANK));
1816 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1817 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1818 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1819 				 NUM_BANKS(ADDR_SURF_16_BANK));
1820 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1821 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1822 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1823 				 NUM_BANKS(ADDR_SURF_16_BANK));
1824 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1825 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1826 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1827 				 NUM_BANKS(ADDR_SURF_16_BANK));
1828 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1829 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1830 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1831 				 NUM_BANKS(ADDR_SURF_16_BANK));
1832 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1833 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1834 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1835 				 NUM_BANKS(ADDR_SURF_8_BANK));
1836 
1837 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1838 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
1839 			    reg_offset != 23)
1840 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
1841 
1842 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1843 			if (reg_offset != 7)
1844 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
1845 
1846 		break;
1847 	case CHIP_FIJI:
1848 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1849 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1850 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1851 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1852 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1853 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1854 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1855 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1856 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1857 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1858 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1859 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1860 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1861 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1862 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1863 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1864 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1865 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1866 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1867 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1868 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1869 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1870 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1871 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1872 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1873 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1874 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1875 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1876 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1877 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1878 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1879 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1880 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1881 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
1882 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1883 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1884 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1885 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1886 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1887 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1888 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1889 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1890 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1891 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1892 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1893 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1894 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1895 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1896 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1897 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1898 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1899 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1900 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1901 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1902 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1903 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1904 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1905 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1906 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1907 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1908 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1909 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1910 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1911 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1912 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1913 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1914 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1915 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1916 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1917 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1918 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1919 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1920 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1921 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1922 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1923 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1924 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1925 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1926 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1927 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1928 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1929 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1930 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1931 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1932 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1933 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1934 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1935 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1936 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1937 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1938 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1939 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1940 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1941 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1942 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1943 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1944 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1945 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1946 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1947 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1948 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1949 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1950 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1951 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1952 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1953 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1954 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1955 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1956 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1957 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1958 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1959 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1960 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1961 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1962 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1963 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1964 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1965 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1966 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1967 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1968 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1969 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1970 
1971 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1972 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1973 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1974 				NUM_BANKS(ADDR_SURF_8_BANK));
1975 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1976 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1977 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1978 				NUM_BANKS(ADDR_SURF_8_BANK));
1979 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1980 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1981 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1982 				NUM_BANKS(ADDR_SURF_8_BANK));
1983 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1984 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1985 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1986 				NUM_BANKS(ADDR_SURF_8_BANK));
1987 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1988 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1989 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1990 				NUM_BANKS(ADDR_SURF_8_BANK));
1991 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1992 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1993 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1994 				NUM_BANKS(ADDR_SURF_8_BANK));
1995 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1996 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1997 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1998 				NUM_BANKS(ADDR_SURF_8_BANK));
1999 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2000 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2001 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2002 				NUM_BANKS(ADDR_SURF_8_BANK));
2003 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2004 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2005 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2006 				NUM_BANKS(ADDR_SURF_8_BANK));
2007 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2008 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2009 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2010 				 NUM_BANKS(ADDR_SURF_8_BANK));
2011 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2012 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2013 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2014 				 NUM_BANKS(ADDR_SURF_8_BANK));
2015 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2016 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2017 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2018 				 NUM_BANKS(ADDR_SURF_8_BANK));
2019 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2020 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2021 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2022 				 NUM_BANKS(ADDR_SURF_8_BANK));
2023 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2024 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2025 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2026 				 NUM_BANKS(ADDR_SURF_4_BANK));
2027 
2028 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2029 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2030 
2031 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2032 			if (reg_offset != 7)
2033 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2034 
2035 		break;
2036 	case CHIP_TONGA:
2037 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2038 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2039 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2040 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2041 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2042 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2043 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2044 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2045 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2046 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2047 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2048 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2049 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2050 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2051 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2052 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2053 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2054 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2055 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2056 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2057 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2058 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2059 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2060 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2061 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2062 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2063 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2064 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2065 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2066 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2067 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2068 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2069 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2070 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2071 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2072 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2073 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2074 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2075 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2076 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2077 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2078 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2079 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2080 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2081 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2082 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2083 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2084 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2085 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2086 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2087 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2088 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2089 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2090 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2091 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2092 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2093 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2094 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2095 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2096 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2097 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2098 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2099 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2100 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2101 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2102 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2103 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2104 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2105 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2106 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2107 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2108 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2109 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2110 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2111 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2112 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2113 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2114 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2115 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2116 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2117 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2118 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2119 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2120 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2121 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2122 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2123 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2124 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2125 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2126 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2127 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2128 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2129 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2130 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2131 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2132 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2133 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2134 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2135 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2136 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2137 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2138 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2139 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2140 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2141 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2142 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2143 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2144 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2145 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2146 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2147 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2148 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2149 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2150 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2151 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2152 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2153 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2154 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2155 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2156 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2157 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2158 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2159 
2160 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2161 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2162 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2163 				NUM_BANKS(ADDR_SURF_16_BANK));
2164 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2165 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2166 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2167 				NUM_BANKS(ADDR_SURF_16_BANK));
2168 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2169 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2170 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2171 				NUM_BANKS(ADDR_SURF_16_BANK));
2172 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2173 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2174 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2175 				NUM_BANKS(ADDR_SURF_16_BANK));
2176 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2177 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2178 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2179 				NUM_BANKS(ADDR_SURF_16_BANK));
2180 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2181 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2182 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2183 				NUM_BANKS(ADDR_SURF_16_BANK));
2184 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2185 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2186 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2187 				NUM_BANKS(ADDR_SURF_16_BANK));
2188 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2189 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2190 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2191 				NUM_BANKS(ADDR_SURF_16_BANK));
2192 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2193 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2194 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2195 				NUM_BANKS(ADDR_SURF_16_BANK));
2196 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2197 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2198 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2199 				 NUM_BANKS(ADDR_SURF_16_BANK));
2200 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2201 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2202 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2203 				 NUM_BANKS(ADDR_SURF_16_BANK));
2204 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2205 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2206 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2207 				 NUM_BANKS(ADDR_SURF_8_BANK));
2208 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2209 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2210 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2211 				 NUM_BANKS(ADDR_SURF_4_BANK));
2212 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2213 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2214 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2215 				 NUM_BANKS(ADDR_SURF_4_BANK));
2216 
2217 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2218 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2219 
2220 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2221 			if (reg_offset != 7)
2222 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2223 
2224 		break;
2225 	case CHIP_STONEY:
2226 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2227 				PIPE_CONFIG(ADDR_SURF_P2) |
2228 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2229 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2230 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2231 				PIPE_CONFIG(ADDR_SURF_P2) |
2232 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2233 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2234 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2235 				PIPE_CONFIG(ADDR_SURF_P2) |
2236 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2237 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2238 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2239 				PIPE_CONFIG(ADDR_SURF_P2) |
2240 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2241 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2242 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2243 				PIPE_CONFIG(ADDR_SURF_P2) |
2244 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2245 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2246 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2247 				PIPE_CONFIG(ADDR_SURF_P2) |
2248 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2249 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2250 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2251 				PIPE_CONFIG(ADDR_SURF_P2) |
2252 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2253 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2254 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2255 				PIPE_CONFIG(ADDR_SURF_P2));
2256 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2257 				PIPE_CONFIG(ADDR_SURF_P2) |
2258 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2259 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2260 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2261 				 PIPE_CONFIG(ADDR_SURF_P2) |
2262 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2263 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2264 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2265 				 PIPE_CONFIG(ADDR_SURF_P2) |
2266 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2267 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2268 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2269 				 PIPE_CONFIG(ADDR_SURF_P2) |
2270 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2271 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2272 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2273 				 PIPE_CONFIG(ADDR_SURF_P2) |
2274 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2275 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2276 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2277 				 PIPE_CONFIG(ADDR_SURF_P2) |
2278 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2279 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2280 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2281 				 PIPE_CONFIG(ADDR_SURF_P2) |
2282 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2283 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2284 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2285 				 PIPE_CONFIG(ADDR_SURF_P2) |
2286 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2287 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2288 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2289 				 PIPE_CONFIG(ADDR_SURF_P2) |
2290 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2291 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2292 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2293 				 PIPE_CONFIG(ADDR_SURF_P2) |
2294 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2295 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2296 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2297 				 PIPE_CONFIG(ADDR_SURF_P2) |
2298 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2299 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2300 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2301 				 PIPE_CONFIG(ADDR_SURF_P2) |
2302 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2303 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2304 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2305 				 PIPE_CONFIG(ADDR_SURF_P2) |
2306 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2307 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2308 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2309 				 PIPE_CONFIG(ADDR_SURF_P2) |
2310 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2311 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2312 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2313 				 PIPE_CONFIG(ADDR_SURF_P2) |
2314 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2315 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2316 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2317 				 PIPE_CONFIG(ADDR_SURF_P2) |
2318 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2319 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2320 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2321 				 PIPE_CONFIG(ADDR_SURF_P2) |
2322 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2323 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2324 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2325 				 PIPE_CONFIG(ADDR_SURF_P2) |
2326 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2327 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2328 
2329 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2330 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2331 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2332 				NUM_BANKS(ADDR_SURF_8_BANK));
2333 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2334 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2335 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2336 				NUM_BANKS(ADDR_SURF_8_BANK));
2337 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2338 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2339 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2340 				NUM_BANKS(ADDR_SURF_8_BANK));
2341 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2342 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2343 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2344 				NUM_BANKS(ADDR_SURF_8_BANK));
2345 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2346 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2347 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2348 				NUM_BANKS(ADDR_SURF_8_BANK));
2349 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2350 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2351 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2352 				NUM_BANKS(ADDR_SURF_8_BANK));
2353 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2354 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2355 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2356 				NUM_BANKS(ADDR_SURF_8_BANK));
2357 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2358 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2359 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2360 				NUM_BANKS(ADDR_SURF_16_BANK));
2361 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2362 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2363 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2364 				NUM_BANKS(ADDR_SURF_16_BANK));
2365 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2366 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2367 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2368 				 NUM_BANKS(ADDR_SURF_16_BANK));
2369 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2370 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2371 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2372 				 NUM_BANKS(ADDR_SURF_16_BANK));
2373 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2374 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2375 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2376 				 NUM_BANKS(ADDR_SURF_16_BANK));
2377 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2378 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2379 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2380 				 NUM_BANKS(ADDR_SURF_16_BANK));
2381 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2382 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2383 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2384 				 NUM_BANKS(ADDR_SURF_8_BANK));
2385 
2386 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2387 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2388 			    reg_offset != 23)
2389 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2390 
2391 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2392 			if (reg_offset != 7)
2393 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2394 
2395 		break;
2396 	default:
2397 		dev_warn(adev->dev,
2398 			 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
2399 			 adev->asic_type);
2400 
2401 	case CHIP_CARRIZO:
2402 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2403 				PIPE_CONFIG(ADDR_SURF_P2) |
2404 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2405 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2406 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2407 				PIPE_CONFIG(ADDR_SURF_P2) |
2408 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2409 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2410 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2411 				PIPE_CONFIG(ADDR_SURF_P2) |
2412 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2413 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2414 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2415 				PIPE_CONFIG(ADDR_SURF_P2) |
2416 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2417 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2418 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2419 				PIPE_CONFIG(ADDR_SURF_P2) |
2420 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2421 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2422 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2423 				PIPE_CONFIG(ADDR_SURF_P2) |
2424 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2425 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2426 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2427 				PIPE_CONFIG(ADDR_SURF_P2) |
2428 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2429 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2430 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2431 				PIPE_CONFIG(ADDR_SURF_P2));
2432 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2433 				PIPE_CONFIG(ADDR_SURF_P2) |
2434 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2435 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2436 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2437 				 PIPE_CONFIG(ADDR_SURF_P2) |
2438 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2439 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2441 				 PIPE_CONFIG(ADDR_SURF_P2) |
2442 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2443 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2444 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2445 				 PIPE_CONFIG(ADDR_SURF_P2) |
2446 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2447 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2448 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2449 				 PIPE_CONFIG(ADDR_SURF_P2) |
2450 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2451 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2452 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2453 				 PIPE_CONFIG(ADDR_SURF_P2) |
2454 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2455 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2456 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2457 				 PIPE_CONFIG(ADDR_SURF_P2) |
2458 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2459 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2460 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2461 				 PIPE_CONFIG(ADDR_SURF_P2) |
2462 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2463 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2464 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2465 				 PIPE_CONFIG(ADDR_SURF_P2) |
2466 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2467 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2468 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2469 				 PIPE_CONFIG(ADDR_SURF_P2) |
2470 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2471 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2472 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2473 				 PIPE_CONFIG(ADDR_SURF_P2) |
2474 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2475 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2476 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2477 				 PIPE_CONFIG(ADDR_SURF_P2) |
2478 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2479 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2480 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2481 				 PIPE_CONFIG(ADDR_SURF_P2) |
2482 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2483 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2484 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2485 				 PIPE_CONFIG(ADDR_SURF_P2) |
2486 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2487 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2488 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2489 				 PIPE_CONFIG(ADDR_SURF_P2) |
2490 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2491 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2492 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2493 				 PIPE_CONFIG(ADDR_SURF_P2) |
2494 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2495 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2496 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2497 				 PIPE_CONFIG(ADDR_SURF_P2) |
2498 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2499 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2500 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2501 				 PIPE_CONFIG(ADDR_SURF_P2) |
2502 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2503 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2504 
2505 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2506 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2507 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2508 				NUM_BANKS(ADDR_SURF_8_BANK));
2509 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2510 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2511 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2512 				NUM_BANKS(ADDR_SURF_8_BANK));
2513 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2514 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2515 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2516 				NUM_BANKS(ADDR_SURF_8_BANK));
2517 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2518 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2519 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2520 				NUM_BANKS(ADDR_SURF_8_BANK));
2521 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2522 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2523 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2524 				NUM_BANKS(ADDR_SURF_8_BANK));
2525 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2526 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2527 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2528 				NUM_BANKS(ADDR_SURF_8_BANK));
2529 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2530 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2531 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2532 				NUM_BANKS(ADDR_SURF_8_BANK));
2533 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2534 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2535 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2536 				NUM_BANKS(ADDR_SURF_16_BANK));
2537 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2538 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2539 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2540 				NUM_BANKS(ADDR_SURF_16_BANK));
2541 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2542 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2543 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2544 				 NUM_BANKS(ADDR_SURF_16_BANK));
2545 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2546 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2547 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2548 				 NUM_BANKS(ADDR_SURF_16_BANK));
2549 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2550 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2551 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2552 				 NUM_BANKS(ADDR_SURF_16_BANK));
2553 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2554 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2555 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2556 				 NUM_BANKS(ADDR_SURF_16_BANK));
2557 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2558 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2559 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2560 				 NUM_BANKS(ADDR_SURF_8_BANK));
2561 
2562 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2563 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2564 			    reg_offset != 23)
2565 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2566 
2567 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2568 			if (reg_offset != 7)
2569 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2570 
2571 		break;
2572 	}
2573 }
2574 
2575 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
2576 {
2577 	return (u32)((1ULL << bit_width) - 1);
2578 }
2579 
2580 void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
2581 {
2582 	u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2583 
2584 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
2585 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2586 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2587 	} else if (se_num == 0xffffffff) {
2588 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2589 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2590 	} else if (sh_num == 0xffffffff) {
2591 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2592 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2593 	} else {
2594 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2595 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2596 	}
2597 	WREG32(mmGRBM_GFX_INDEX, data);
2598 }
2599 
2600 static u32 gfx_v8_0_get_rb_disabled(struct amdgpu_device *adev,
2601 				    u32 max_rb_num_per_se,
2602 				    u32 sh_per_se)
2603 {
2604 	u32 data, mask;
2605 
2606 	data = RREG32(mmCC_RB_BACKEND_DISABLE);
2607 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2608 
2609 	data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
2610 
2611 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2612 
2613 	mask = gfx_v8_0_create_bitmask(max_rb_num_per_se / sh_per_se);
2614 
2615 	return data & mask;
2616 }
2617 
2618 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev,
2619 			      u32 se_num, u32 sh_per_se,
2620 			      u32 max_rb_num_per_se)
2621 {
2622 	int i, j;
2623 	u32 data, mask;
2624 	u32 disabled_rbs = 0;
2625 	u32 enabled_rbs = 0;
2626 
2627 	mutex_lock(&adev->grbm_idx_mutex);
2628 	for (i = 0; i < se_num; i++) {
2629 		for (j = 0; j < sh_per_se; j++) {
2630 			gfx_v8_0_select_se_sh(adev, i, j);
2631 			data = gfx_v8_0_get_rb_disabled(adev,
2632 					      max_rb_num_per_se, sh_per_se);
2633 			disabled_rbs |= data << ((i * sh_per_se + j) *
2634 						 RB_BITMAP_WIDTH_PER_SH);
2635 		}
2636 	}
2637 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2638 	mutex_unlock(&adev->grbm_idx_mutex);
2639 
2640 	mask = 1;
2641 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
2642 		if (!(disabled_rbs & mask))
2643 			enabled_rbs |= mask;
2644 		mask <<= 1;
2645 	}
2646 
2647 	adev->gfx.config.backend_enable_mask = enabled_rbs;
2648 
2649 	mutex_lock(&adev->grbm_idx_mutex);
2650 	for (i = 0; i < se_num; i++) {
2651 		gfx_v8_0_select_se_sh(adev, i, 0xffffffff);
2652 		data = RREG32(mmPA_SC_RASTER_CONFIG);
2653 		for (j = 0; j < sh_per_se; j++) {
2654 			switch (enabled_rbs & 3) {
2655 			case 0:
2656 				if (j == 0)
2657 					data |= (RASTER_CONFIG_RB_MAP_3 <<
2658 						 PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
2659 				else
2660 					data |= (RASTER_CONFIG_RB_MAP_0 <<
2661 						 PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
2662 				break;
2663 			case 1:
2664 				data |= (RASTER_CONFIG_RB_MAP_0 <<
2665 					 (i * sh_per_se + j) * 2);
2666 				break;
2667 			case 2:
2668 				data |= (RASTER_CONFIG_RB_MAP_3 <<
2669 					 (i * sh_per_se + j) * 2);
2670 				break;
2671 			case 3:
2672 			default:
2673 				data |= (RASTER_CONFIG_RB_MAP_2 <<
2674 					 (i * sh_per_se + j) * 2);
2675 				break;
2676 			}
2677 			enabled_rbs >>= 2;
2678 		}
2679 		WREG32(mmPA_SC_RASTER_CONFIG, data);
2680 	}
2681 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2682 	mutex_unlock(&adev->grbm_idx_mutex);
2683 }
2684 
2685 /**
2686  * gfx_v8_0_init_compute_vmid - gart enable
2687  *
2688  * @rdev: amdgpu_device pointer
2689  *
2690  * Initialize compute vmid sh_mem registers
2691  *
2692  */
2693 #define DEFAULT_SH_MEM_BASES	(0x6000)
2694 #define FIRST_COMPUTE_VMID	(8)
2695 #define LAST_COMPUTE_VMID	(16)
2696 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
2697 {
2698 	int i;
2699 	uint32_t sh_mem_config;
2700 	uint32_t sh_mem_bases;
2701 
2702 	/*
2703 	 * Configure apertures:
2704 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2705 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2706 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2707 	 */
2708 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2709 
2710 	sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
2711 			SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
2712 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2713 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
2714 			MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
2715 			SH_MEM_CONFIG__PRIVATE_ATC_MASK;
2716 
2717 	mutex_lock(&adev->srbm_mutex);
2718 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2719 		vi_srbm_select(adev, 0, 0, 0, i);
2720 		/* CP and shaders */
2721 		WREG32(mmSH_MEM_CONFIG, sh_mem_config);
2722 		WREG32(mmSH_MEM_APE1_BASE, 1);
2723 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
2724 		WREG32(mmSH_MEM_BASES, sh_mem_bases);
2725 	}
2726 	vi_srbm_select(adev, 0, 0, 0, 0);
2727 	mutex_unlock(&adev->srbm_mutex);
2728 }
2729 
2730 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
2731 {
2732 	u32 tmp;
2733 	int i;
2734 
2735 	tmp = RREG32(mmGRBM_CNTL);
2736 	tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
2737 	WREG32(mmGRBM_CNTL, tmp);
2738 
2739 	WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2740 	WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2741 	WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
2742 	WREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET,
2743 	       adev->gfx.config.gb_addr_config & 0x70);
2744 	WREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET,
2745 	       adev->gfx.config.gb_addr_config & 0x70);
2746 	WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2747 	WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2748 	WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2749 
2750 	gfx_v8_0_tiling_mode_table_init(adev);
2751 
2752 	gfx_v8_0_setup_rb(adev, adev->gfx.config.max_shader_engines,
2753 				 adev->gfx.config.max_sh_per_se,
2754 				 adev->gfx.config.max_backends_per_se);
2755 
2756 	/* XXX SH_MEM regs */
2757 	/* where to put LDS, scratch, GPUVM in FSA64 space */
2758 	mutex_lock(&adev->srbm_mutex);
2759 	for (i = 0; i < 16; i++) {
2760 		vi_srbm_select(adev, 0, 0, 0, i);
2761 		/* CP and shaders */
2762 		if (i == 0) {
2763 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
2764 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
2765 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
2766 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2767 			WREG32(mmSH_MEM_CONFIG, tmp);
2768 		} else {
2769 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
2770 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
2771 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
2772 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2773 			WREG32(mmSH_MEM_CONFIG, tmp);
2774 		}
2775 
2776 		WREG32(mmSH_MEM_APE1_BASE, 1);
2777 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
2778 		WREG32(mmSH_MEM_BASES, 0);
2779 	}
2780 	vi_srbm_select(adev, 0, 0, 0, 0);
2781 	mutex_unlock(&adev->srbm_mutex);
2782 
2783 	gfx_v8_0_init_compute_vmid(adev);
2784 
2785 	mutex_lock(&adev->grbm_idx_mutex);
2786 	/*
2787 	 * making sure that the following register writes will be broadcasted
2788 	 * to all the shaders
2789 	 */
2790 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2791 
2792 	WREG32(mmPA_SC_FIFO_SIZE,
2793 		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
2794 			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
2795 		   (adev->gfx.config.sc_prim_fifo_size_backend <<
2796 			PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
2797 		   (adev->gfx.config.sc_hiz_tile_fifo_size <<
2798 			PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
2799 		   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
2800 			PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
2801 	mutex_unlock(&adev->grbm_idx_mutex);
2802 
2803 }
2804 
2805 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2806 {
2807 	u32 i, j, k;
2808 	u32 mask;
2809 
2810 	mutex_lock(&adev->grbm_idx_mutex);
2811 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2812 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2813 			gfx_v8_0_select_se_sh(adev, i, j);
2814 			for (k = 0; k < adev->usec_timeout; k++) {
2815 				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2816 					break;
2817 				udelay(1);
2818 			}
2819 		}
2820 	}
2821 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2822 	mutex_unlock(&adev->grbm_idx_mutex);
2823 
2824 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2825 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2826 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2827 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2828 	for (k = 0; k < adev->usec_timeout; k++) {
2829 		if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2830 			break;
2831 		udelay(1);
2832 	}
2833 }
2834 
2835 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2836 					       bool enable)
2837 {
2838 	u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
2839 
2840 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2841 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2842 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2843 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2844 
2845 	WREG32(mmCP_INT_CNTL_RING0, tmp);
2846 }
2847 
2848 void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
2849 {
2850 	u32 tmp = RREG32(mmRLC_CNTL);
2851 
2852 	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
2853 	WREG32(mmRLC_CNTL, tmp);
2854 
2855 	gfx_v8_0_enable_gui_idle_interrupt(adev, false);
2856 
2857 	gfx_v8_0_wait_for_rlc_serdes(adev);
2858 }
2859 
2860 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
2861 {
2862 	u32 tmp = RREG32(mmGRBM_SOFT_RESET);
2863 
2864 	tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2865 	WREG32(mmGRBM_SOFT_RESET, tmp);
2866 	udelay(50);
2867 	tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2868 	WREG32(mmGRBM_SOFT_RESET, tmp);
2869 	udelay(50);
2870 }
2871 
2872 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
2873 {
2874 	u32 tmp = RREG32(mmRLC_CNTL);
2875 
2876 	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
2877 	WREG32(mmRLC_CNTL, tmp);
2878 
2879 	/* carrizo do enable cp interrupt after cp inited */
2880 	if (!(adev->flags & AMD_IS_APU))
2881 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
2882 
2883 	udelay(50);
2884 }
2885 
2886 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
2887 {
2888 	const struct rlc_firmware_header_v2_0 *hdr;
2889 	const __le32 *fw_data;
2890 	unsigned i, fw_size;
2891 
2892 	if (!adev->gfx.rlc_fw)
2893 		return -EINVAL;
2894 
2895 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2896 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
2897 
2898 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2899 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2900 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2901 
2902 	WREG32(mmRLC_GPM_UCODE_ADDR, 0);
2903 	for (i = 0; i < fw_size; i++)
2904 		WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2905 	WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2906 
2907 	return 0;
2908 }
2909 
2910 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
2911 {
2912 	int r;
2913 
2914 	gfx_v8_0_rlc_stop(adev);
2915 
2916 	/* disable CG */
2917 	WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
2918 
2919 	/* disable PG */
2920 	WREG32(mmRLC_PG_CNTL, 0);
2921 
2922 	gfx_v8_0_rlc_reset(adev);
2923 
2924 	if (!adev->pp_enabled) {
2925 		if (!adev->firmware.smu_load) {
2926 			/* legacy rlc firmware loading */
2927 			r = gfx_v8_0_rlc_load_microcode(adev);
2928 			if (r)
2929 				return r;
2930 		} else {
2931 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
2932 							AMDGPU_UCODE_ID_RLC_G);
2933 			if (r)
2934 				return -EINVAL;
2935 		}
2936 	}
2937 
2938 	gfx_v8_0_rlc_start(adev);
2939 
2940 	return 0;
2941 }
2942 
2943 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2944 {
2945 	int i;
2946 	u32 tmp = RREG32(mmCP_ME_CNTL);
2947 
2948 	if (enable) {
2949 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
2950 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
2951 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
2952 	} else {
2953 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
2954 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
2955 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
2956 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2957 			adev->gfx.gfx_ring[i].ready = false;
2958 	}
2959 	WREG32(mmCP_ME_CNTL, tmp);
2960 	udelay(50);
2961 }
2962 
2963 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2964 {
2965 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
2966 	const struct gfx_firmware_header_v1_0 *ce_hdr;
2967 	const struct gfx_firmware_header_v1_0 *me_hdr;
2968 	const __le32 *fw_data;
2969 	unsigned i, fw_size;
2970 
2971 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2972 		return -EINVAL;
2973 
2974 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2975 		adev->gfx.pfp_fw->data;
2976 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2977 		adev->gfx.ce_fw->data;
2978 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
2979 		adev->gfx.me_fw->data;
2980 
2981 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2982 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2983 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2984 
2985 	gfx_v8_0_cp_gfx_enable(adev, false);
2986 
2987 	/* PFP */
2988 	fw_data = (const __le32 *)
2989 		(adev->gfx.pfp_fw->data +
2990 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2991 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2992 	WREG32(mmCP_PFP_UCODE_ADDR, 0);
2993 	for (i = 0; i < fw_size; i++)
2994 		WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2995 	WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2996 
2997 	/* CE */
2998 	fw_data = (const __le32 *)
2999 		(adev->gfx.ce_fw->data +
3000 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3001 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3002 	WREG32(mmCP_CE_UCODE_ADDR, 0);
3003 	for (i = 0; i < fw_size; i++)
3004 		WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3005 	WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3006 
3007 	/* ME */
3008 	fw_data = (const __le32 *)
3009 		(adev->gfx.me_fw->data +
3010 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3011 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3012 	WREG32(mmCP_ME_RAM_WADDR, 0);
3013 	for (i = 0; i < fw_size; i++)
3014 		WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3015 	WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3016 
3017 	return 0;
3018 }
3019 
3020 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
3021 {
3022 	u32 count = 0;
3023 	const struct cs_section_def *sect = NULL;
3024 	const struct cs_extent_def *ext = NULL;
3025 
3026 	/* begin clear state */
3027 	count += 2;
3028 	/* context control state */
3029 	count += 3;
3030 
3031 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3032 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3033 			if (sect->id == SECT_CONTEXT)
3034 				count += 2 + ext->reg_count;
3035 			else
3036 				return 0;
3037 		}
3038 	}
3039 	/* pa_sc_raster_config/pa_sc_raster_config1 */
3040 	count += 4;
3041 	/* end clear state */
3042 	count += 2;
3043 	/* clear state */
3044 	count += 2;
3045 
3046 	return count;
3047 }
3048 
3049 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
3050 {
3051 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3052 	const struct cs_section_def *sect = NULL;
3053 	const struct cs_extent_def *ext = NULL;
3054 	int r, i;
3055 
3056 	/* init the CP */
3057 	WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3058 	WREG32(mmCP_ENDIAN_SWAP, 0);
3059 	WREG32(mmCP_DEVICE_ID, 1);
3060 
3061 	gfx_v8_0_cp_gfx_enable(adev, true);
3062 
3063 	r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
3064 	if (r) {
3065 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3066 		return r;
3067 	}
3068 
3069 	/* clear state buffer */
3070 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3071 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3072 
3073 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3074 	amdgpu_ring_write(ring, 0x80000000);
3075 	amdgpu_ring_write(ring, 0x80000000);
3076 
3077 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3078 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3079 			if (sect->id == SECT_CONTEXT) {
3080 				amdgpu_ring_write(ring,
3081 				       PACKET3(PACKET3_SET_CONTEXT_REG,
3082 					       ext->reg_count));
3083 				amdgpu_ring_write(ring,
3084 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3085 				for (i = 0; i < ext->reg_count; i++)
3086 					amdgpu_ring_write(ring, ext->extent[i]);
3087 			}
3088 		}
3089 	}
3090 
3091 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3092 	amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
3093 	switch (adev->asic_type) {
3094 	case CHIP_TONGA:
3095 		amdgpu_ring_write(ring, 0x16000012);
3096 		amdgpu_ring_write(ring, 0x0000002A);
3097 		break;
3098 	case CHIP_FIJI:
3099 		amdgpu_ring_write(ring, 0x3a00161a);
3100 		amdgpu_ring_write(ring, 0x0000002e);
3101 		break;
3102 	case CHIP_TOPAZ:
3103 	case CHIP_CARRIZO:
3104 		amdgpu_ring_write(ring, 0x00000002);
3105 		amdgpu_ring_write(ring, 0x00000000);
3106 		break;
3107 	case CHIP_STONEY:
3108 		amdgpu_ring_write(ring, 0x00000000);
3109 		amdgpu_ring_write(ring, 0x00000000);
3110 		break;
3111 	default:
3112 		BUG();
3113 	}
3114 
3115 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3116 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3117 
3118 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3119 	amdgpu_ring_write(ring, 0);
3120 
3121 	/* init the CE partitions */
3122 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3123 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3124 	amdgpu_ring_write(ring, 0x8000);
3125 	amdgpu_ring_write(ring, 0x8000);
3126 
3127 	amdgpu_ring_commit(ring);
3128 
3129 	return 0;
3130 }
3131 
3132 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
3133 {
3134 	struct amdgpu_ring *ring;
3135 	u32 tmp;
3136 	u32 rb_bufsz;
3137 	u64 rb_addr, rptr_addr;
3138 	int r;
3139 
3140 	/* Set the write pointer delay */
3141 	WREG32(mmCP_RB_WPTR_DELAY, 0);
3142 
3143 	/* set the RB to use vmid 0 */
3144 	WREG32(mmCP_RB_VMID, 0);
3145 
3146 	/* Set ring buffer size */
3147 	ring = &adev->gfx.gfx_ring[0];
3148 	rb_bufsz = order_base_2(ring->ring_size / 8);
3149 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3150 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3151 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
3152 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
3153 #ifdef __BIG_ENDIAN
3154 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3155 #endif
3156 	WREG32(mmCP_RB0_CNTL, tmp);
3157 
3158 	/* Initialize the ring buffer's read and write pointers */
3159 	WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
3160 	ring->wptr = 0;
3161 	WREG32(mmCP_RB0_WPTR, ring->wptr);
3162 
3163 	/* set the wb address wether it's enabled or not */
3164 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3165 	WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3166 	WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
3167 
3168 	mdelay(1);
3169 	WREG32(mmCP_RB0_CNTL, tmp);
3170 
3171 	rb_addr = ring->gpu_addr >> 8;
3172 	WREG32(mmCP_RB0_BASE, rb_addr);
3173 	WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3174 
3175 	/* no gfx doorbells on iceland */
3176 	if (adev->asic_type != CHIP_TOPAZ) {
3177 		tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
3178 		if (ring->use_doorbell) {
3179 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3180 					    DOORBELL_OFFSET, ring->doorbell_index);
3181 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3182 					    DOORBELL_EN, 1);
3183 		} else {
3184 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3185 					    DOORBELL_EN, 0);
3186 		}
3187 		WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
3188 
3189 		if (adev->asic_type == CHIP_TONGA) {
3190 			tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3191 					    DOORBELL_RANGE_LOWER,
3192 					    AMDGPU_DOORBELL_GFX_RING0);
3193 			WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3194 
3195 			WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
3196 			       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3197 		}
3198 
3199 	}
3200 
3201 	/* start the ring */
3202 	gfx_v8_0_cp_gfx_start(adev);
3203 	ring->ready = true;
3204 	r = amdgpu_ring_test_ring(ring);
3205 	if (r) {
3206 		ring->ready = false;
3207 		return r;
3208 	}
3209 
3210 	return 0;
3211 }
3212 
3213 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3214 {
3215 	int i;
3216 
3217 	if (enable) {
3218 		WREG32(mmCP_MEC_CNTL, 0);
3219 	} else {
3220 		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3221 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
3222 			adev->gfx.compute_ring[i].ready = false;
3223 	}
3224 	udelay(50);
3225 }
3226 
3227 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3228 {
3229 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3230 	const __le32 *fw_data;
3231 	unsigned i, fw_size;
3232 
3233 	if (!adev->gfx.mec_fw)
3234 		return -EINVAL;
3235 
3236 	gfx_v8_0_cp_compute_enable(adev, false);
3237 
3238 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3239 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3240 
3241 	fw_data = (const __le32 *)
3242 		(adev->gfx.mec_fw->data +
3243 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3244 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
3245 
3246 	/* MEC1 */
3247 	WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
3248 	for (i = 0; i < fw_size; i++)
3249 		WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
3250 	WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
3251 
3252 	/* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3253 	if (adev->gfx.mec2_fw) {
3254 		const struct gfx_firmware_header_v1_0 *mec2_hdr;
3255 
3256 		mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
3257 		amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
3258 
3259 		fw_data = (const __le32 *)
3260 			(adev->gfx.mec2_fw->data +
3261 			 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
3262 		fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
3263 
3264 		WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
3265 		for (i = 0; i < fw_size; i++)
3266 			WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
3267 		WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
3268 	}
3269 
3270 	return 0;
3271 }
3272 
3273 struct vi_mqd {
3274 	uint32_t header;  /* ordinal0 */
3275 	uint32_t compute_dispatch_initiator;  /* ordinal1 */
3276 	uint32_t compute_dim_x;  /* ordinal2 */
3277 	uint32_t compute_dim_y;  /* ordinal3 */
3278 	uint32_t compute_dim_z;  /* ordinal4 */
3279 	uint32_t compute_start_x;  /* ordinal5 */
3280 	uint32_t compute_start_y;  /* ordinal6 */
3281 	uint32_t compute_start_z;  /* ordinal7 */
3282 	uint32_t compute_num_thread_x;  /* ordinal8 */
3283 	uint32_t compute_num_thread_y;  /* ordinal9 */
3284 	uint32_t compute_num_thread_z;  /* ordinal10 */
3285 	uint32_t compute_pipelinestat_enable;  /* ordinal11 */
3286 	uint32_t compute_perfcount_enable;  /* ordinal12 */
3287 	uint32_t compute_pgm_lo;  /* ordinal13 */
3288 	uint32_t compute_pgm_hi;  /* ordinal14 */
3289 	uint32_t compute_tba_lo;  /* ordinal15 */
3290 	uint32_t compute_tba_hi;  /* ordinal16 */
3291 	uint32_t compute_tma_lo;  /* ordinal17 */
3292 	uint32_t compute_tma_hi;  /* ordinal18 */
3293 	uint32_t compute_pgm_rsrc1;  /* ordinal19 */
3294 	uint32_t compute_pgm_rsrc2;  /* ordinal20 */
3295 	uint32_t compute_vmid;  /* ordinal21 */
3296 	uint32_t compute_resource_limits;  /* ordinal22 */
3297 	uint32_t compute_static_thread_mgmt_se0;  /* ordinal23 */
3298 	uint32_t compute_static_thread_mgmt_se1;  /* ordinal24 */
3299 	uint32_t compute_tmpring_size;  /* ordinal25 */
3300 	uint32_t compute_static_thread_mgmt_se2;  /* ordinal26 */
3301 	uint32_t compute_static_thread_mgmt_se3;  /* ordinal27 */
3302 	uint32_t compute_restart_x;  /* ordinal28 */
3303 	uint32_t compute_restart_y;  /* ordinal29 */
3304 	uint32_t compute_restart_z;  /* ordinal30 */
3305 	uint32_t compute_thread_trace_enable;  /* ordinal31 */
3306 	uint32_t compute_misc_reserved;  /* ordinal32 */
3307 	uint32_t compute_dispatch_id;  /* ordinal33 */
3308 	uint32_t compute_threadgroup_id;  /* ordinal34 */
3309 	uint32_t compute_relaunch;  /* ordinal35 */
3310 	uint32_t compute_wave_restore_addr_lo;  /* ordinal36 */
3311 	uint32_t compute_wave_restore_addr_hi;  /* ordinal37 */
3312 	uint32_t compute_wave_restore_control;  /* ordinal38 */
3313 	uint32_t reserved9;  /* ordinal39 */
3314 	uint32_t reserved10;  /* ordinal40 */
3315 	uint32_t reserved11;  /* ordinal41 */
3316 	uint32_t reserved12;  /* ordinal42 */
3317 	uint32_t reserved13;  /* ordinal43 */
3318 	uint32_t reserved14;  /* ordinal44 */
3319 	uint32_t reserved15;  /* ordinal45 */
3320 	uint32_t reserved16;  /* ordinal46 */
3321 	uint32_t reserved17;  /* ordinal47 */
3322 	uint32_t reserved18;  /* ordinal48 */
3323 	uint32_t reserved19;  /* ordinal49 */
3324 	uint32_t reserved20;  /* ordinal50 */
3325 	uint32_t reserved21;  /* ordinal51 */
3326 	uint32_t reserved22;  /* ordinal52 */
3327 	uint32_t reserved23;  /* ordinal53 */
3328 	uint32_t reserved24;  /* ordinal54 */
3329 	uint32_t reserved25;  /* ordinal55 */
3330 	uint32_t reserved26;  /* ordinal56 */
3331 	uint32_t reserved27;  /* ordinal57 */
3332 	uint32_t reserved28;  /* ordinal58 */
3333 	uint32_t reserved29;  /* ordinal59 */
3334 	uint32_t reserved30;  /* ordinal60 */
3335 	uint32_t reserved31;  /* ordinal61 */
3336 	uint32_t reserved32;  /* ordinal62 */
3337 	uint32_t reserved33;  /* ordinal63 */
3338 	uint32_t reserved34;  /* ordinal64 */
3339 	uint32_t compute_user_data_0;  /* ordinal65 */
3340 	uint32_t compute_user_data_1;  /* ordinal66 */
3341 	uint32_t compute_user_data_2;  /* ordinal67 */
3342 	uint32_t compute_user_data_3;  /* ordinal68 */
3343 	uint32_t compute_user_data_4;  /* ordinal69 */
3344 	uint32_t compute_user_data_5;  /* ordinal70 */
3345 	uint32_t compute_user_data_6;  /* ordinal71 */
3346 	uint32_t compute_user_data_7;  /* ordinal72 */
3347 	uint32_t compute_user_data_8;  /* ordinal73 */
3348 	uint32_t compute_user_data_9;  /* ordinal74 */
3349 	uint32_t compute_user_data_10;  /* ordinal75 */
3350 	uint32_t compute_user_data_11;  /* ordinal76 */
3351 	uint32_t compute_user_data_12;  /* ordinal77 */
3352 	uint32_t compute_user_data_13;  /* ordinal78 */
3353 	uint32_t compute_user_data_14;  /* ordinal79 */
3354 	uint32_t compute_user_data_15;  /* ordinal80 */
3355 	uint32_t cp_compute_csinvoc_count_lo;  /* ordinal81 */
3356 	uint32_t cp_compute_csinvoc_count_hi;  /* ordinal82 */
3357 	uint32_t reserved35;  /* ordinal83 */
3358 	uint32_t reserved36;  /* ordinal84 */
3359 	uint32_t reserved37;  /* ordinal85 */
3360 	uint32_t cp_mqd_query_time_lo;  /* ordinal86 */
3361 	uint32_t cp_mqd_query_time_hi;  /* ordinal87 */
3362 	uint32_t cp_mqd_connect_start_time_lo;  /* ordinal88 */
3363 	uint32_t cp_mqd_connect_start_time_hi;  /* ordinal89 */
3364 	uint32_t cp_mqd_connect_end_time_lo;  /* ordinal90 */
3365 	uint32_t cp_mqd_connect_end_time_hi;  /* ordinal91 */
3366 	uint32_t cp_mqd_connect_end_wf_count;  /* ordinal92 */
3367 	uint32_t cp_mqd_connect_end_pq_rptr;  /* ordinal93 */
3368 	uint32_t cp_mqd_connect_end_pq_wptr;  /* ordinal94 */
3369 	uint32_t cp_mqd_connect_end_ib_rptr;  /* ordinal95 */
3370 	uint32_t reserved38;  /* ordinal96 */
3371 	uint32_t reserved39;  /* ordinal97 */
3372 	uint32_t cp_mqd_save_start_time_lo;  /* ordinal98 */
3373 	uint32_t cp_mqd_save_start_time_hi;  /* ordinal99 */
3374 	uint32_t cp_mqd_save_end_time_lo;  /* ordinal100 */
3375 	uint32_t cp_mqd_save_end_time_hi;  /* ordinal101 */
3376 	uint32_t cp_mqd_restore_start_time_lo;  /* ordinal102 */
3377 	uint32_t cp_mqd_restore_start_time_hi;  /* ordinal103 */
3378 	uint32_t cp_mqd_restore_end_time_lo;  /* ordinal104 */
3379 	uint32_t cp_mqd_restore_end_time_hi;  /* ordinal105 */
3380 	uint32_t reserved40;  /* ordinal106 */
3381 	uint32_t reserved41;  /* ordinal107 */
3382 	uint32_t gds_cs_ctxsw_cnt0;  /* ordinal108 */
3383 	uint32_t gds_cs_ctxsw_cnt1;  /* ordinal109 */
3384 	uint32_t gds_cs_ctxsw_cnt2;  /* ordinal110 */
3385 	uint32_t gds_cs_ctxsw_cnt3;  /* ordinal111 */
3386 	uint32_t reserved42;  /* ordinal112 */
3387 	uint32_t reserved43;  /* ordinal113 */
3388 	uint32_t cp_pq_exe_status_lo;  /* ordinal114 */
3389 	uint32_t cp_pq_exe_status_hi;  /* ordinal115 */
3390 	uint32_t cp_packet_id_lo;  /* ordinal116 */
3391 	uint32_t cp_packet_id_hi;  /* ordinal117 */
3392 	uint32_t cp_packet_exe_status_lo;  /* ordinal118 */
3393 	uint32_t cp_packet_exe_status_hi;  /* ordinal119 */
3394 	uint32_t gds_save_base_addr_lo;  /* ordinal120 */
3395 	uint32_t gds_save_base_addr_hi;  /* ordinal121 */
3396 	uint32_t gds_save_mask_lo;  /* ordinal122 */
3397 	uint32_t gds_save_mask_hi;  /* ordinal123 */
3398 	uint32_t ctx_save_base_addr_lo;  /* ordinal124 */
3399 	uint32_t ctx_save_base_addr_hi;  /* ordinal125 */
3400 	uint32_t reserved44;  /* ordinal126 */
3401 	uint32_t reserved45;  /* ordinal127 */
3402 	uint32_t cp_mqd_base_addr_lo;  /* ordinal128 */
3403 	uint32_t cp_mqd_base_addr_hi;  /* ordinal129 */
3404 	uint32_t cp_hqd_active;  /* ordinal130 */
3405 	uint32_t cp_hqd_vmid;  /* ordinal131 */
3406 	uint32_t cp_hqd_persistent_state;  /* ordinal132 */
3407 	uint32_t cp_hqd_pipe_priority;  /* ordinal133 */
3408 	uint32_t cp_hqd_queue_priority;  /* ordinal134 */
3409 	uint32_t cp_hqd_quantum;  /* ordinal135 */
3410 	uint32_t cp_hqd_pq_base_lo;  /* ordinal136 */
3411 	uint32_t cp_hqd_pq_base_hi;  /* ordinal137 */
3412 	uint32_t cp_hqd_pq_rptr;  /* ordinal138 */
3413 	uint32_t cp_hqd_pq_rptr_report_addr_lo;  /* ordinal139 */
3414 	uint32_t cp_hqd_pq_rptr_report_addr_hi;  /* ordinal140 */
3415 	uint32_t cp_hqd_pq_wptr_poll_addr;  /* ordinal141 */
3416 	uint32_t cp_hqd_pq_wptr_poll_addr_hi;  /* ordinal142 */
3417 	uint32_t cp_hqd_pq_doorbell_control;  /* ordinal143 */
3418 	uint32_t cp_hqd_pq_wptr;  /* ordinal144 */
3419 	uint32_t cp_hqd_pq_control;  /* ordinal145 */
3420 	uint32_t cp_hqd_ib_base_addr_lo;  /* ordinal146 */
3421 	uint32_t cp_hqd_ib_base_addr_hi;  /* ordinal147 */
3422 	uint32_t cp_hqd_ib_rptr;  /* ordinal148 */
3423 	uint32_t cp_hqd_ib_control;  /* ordinal149 */
3424 	uint32_t cp_hqd_iq_timer;  /* ordinal150 */
3425 	uint32_t cp_hqd_iq_rptr;  /* ordinal151 */
3426 	uint32_t cp_hqd_dequeue_request;  /* ordinal152 */
3427 	uint32_t cp_hqd_dma_offload;  /* ordinal153 */
3428 	uint32_t cp_hqd_sema_cmd;  /* ordinal154 */
3429 	uint32_t cp_hqd_msg_type;  /* ordinal155 */
3430 	uint32_t cp_hqd_atomic0_preop_lo;  /* ordinal156 */
3431 	uint32_t cp_hqd_atomic0_preop_hi;  /* ordinal157 */
3432 	uint32_t cp_hqd_atomic1_preop_lo;  /* ordinal158 */
3433 	uint32_t cp_hqd_atomic1_preop_hi;  /* ordinal159 */
3434 	uint32_t cp_hqd_hq_status0;  /* ordinal160 */
3435 	uint32_t cp_hqd_hq_control0;  /* ordinal161 */
3436 	uint32_t cp_mqd_control;  /* ordinal162 */
3437 	uint32_t cp_hqd_hq_status1;  /* ordinal163 */
3438 	uint32_t cp_hqd_hq_control1;  /* ordinal164 */
3439 	uint32_t cp_hqd_eop_base_addr_lo;  /* ordinal165 */
3440 	uint32_t cp_hqd_eop_base_addr_hi;  /* ordinal166 */
3441 	uint32_t cp_hqd_eop_control;  /* ordinal167 */
3442 	uint32_t cp_hqd_eop_rptr;  /* ordinal168 */
3443 	uint32_t cp_hqd_eop_wptr;  /* ordinal169 */
3444 	uint32_t cp_hqd_eop_done_events;  /* ordinal170 */
3445 	uint32_t cp_hqd_ctx_save_base_addr_lo;  /* ordinal171 */
3446 	uint32_t cp_hqd_ctx_save_base_addr_hi;  /* ordinal172 */
3447 	uint32_t cp_hqd_ctx_save_control;  /* ordinal173 */
3448 	uint32_t cp_hqd_cntl_stack_offset;  /* ordinal174 */
3449 	uint32_t cp_hqd_cntl_stack_size;  /* ordinal175 */
3450 	uint32_t cp_hqd_wg_state_offset;  /* ordinal176 */
3451 	uint32_t cp_hqd_ctx_save_size;  /* ordinal177 */
3452 	uint32_t cp_hqd_gds_resource_state;  /* ordinal178 */
3453 	uint32_t cp_hqd_error;  /* ordinal179 */
3454 	uint32_t cp_hqd_eop_wptr_mem;  /* ordinal180 */
3455 	uint32_t cp_hqd_eop_dones;  /* ordinal181 */
3456 	uint32_t reserved46;  /* ordinal182 */
3457 	uint32_t reserved47;  /* ordinal183 */
3458 	uint32_t reserved48;  /* ordinal184 */
3459 	uint32_t reserved49;  /* ordinal185 */
3460 	uint32_t reserved50;  /* ordinal186 */
3461 	uint32_t reserved51;  /* ordinal187 */
3462 	uint32_t reserved52;  /* ordinal188 */
3463 	uint32_t reserved53;  /* ordinal189 */
3464 	uint32_t reserved54;  /* ordinal190 */
3465 	uint32_t reserved55;  /* ordinal191 */
3466 	uint32_t iqtimer_pkt_header;  /* ordinal192 */
3467 	uint32_t iqtimer_pkt_dw0;  /* ordinal193 */
3468 	uint32_t iqtimer_pkt_dw1;  /* ordinal194 */
3469 	uint32_t iqtimer_pkt_dw2;  /* ordinal195 */
3470 	uint32_t iqtimer_pkt_dw3;  /* ordinal196 */
3471 	uint32_t iqtimer_pkt_dw4;  /* ordinal197 */
3472 	uint32_t iqtimer_pkt_dw5;  /* ordinal198 */
3473 	uint32_t iqtimer_pkt_dw6;  /* ordinal199 */
3474 	uint32_t iqtimer_pkt_dw7;  /* ordinal200 */
3475 	uint32_t iqtimer_pkt_dw8;  /* ordinal201 */
3476 	uint32_t iqtimer_pkt_dw9;  /* ordinal202 */
3477 	uint32_t iqtimer_pkt_dw10;  /* ordinal203 */
3478 	uint32_t iqtimer_pkt_dw11;  /* ordinal204 */
3479 	uint32_t iqtimer_pkt_dw12;  /* ordinal205 */
3480 	uint32_t iqtimer_pkt_dw13;  /* ordinal206 */
3481 	uint32_t iqtimer_pkt_dw14;  /* ordinal207 */
3482 	uint32_t iqtimer_pkt_dw15;  /* ordinal208 */
3483 	uint32_t iqtimer_pkt_dw16;  /* ordinal209 */
3484 	uint32_t iqtimer_pkt_dw17;  /* ordinal210 */
3485 	uint32_t iqtimer_pkt_dw18;  /* ordinal211 */
3486 	uint32_t iqtimer_pkt_dw19;  /* ordinal212 */
3487 	uint32_t iqtimer_pkt_dw20;  /* ordinal213 */
3488 	uint32_t iqtimer_pkt_dw21;  /* ordinal214 */
3489 	uint32_t iqtimer_pkt_dw22;  /* ordinal215 */
3490 	uint32_t iqtimer_pkt_dw23;  /* ordinal216 */
3491 	uint32_t iqtimer_pkt_dw24;  /* ordinal217 */
3492 	uint32_t iqtimer_pkt_dw25;  /* ordinal218 */
3493 	uint32_t iqtimer_pkt_dw26;  /* ordinal219 */
3494 	uint32_t iqtimer_pkt_dw27;  /* ordinal220 */
3495 	uint32_t iqtimer_pkt_dw28;  /* ordinal221 */
3496 	uint32_t iqtimer_pkt_dw29;  /* ordinal222 */
3497 	uint32_t iqtimer_pkt_dw30;  /* ordinal223 */
3498 	uint32_t iqtimer_pkt_dw31;  /* ordinal224 */
3499 	uint32_t reserved56;  /* ordinal225 */
3500 	uint32_t reserved57;  /* ordinal226 */
3501 	uint32_t reserved58;  /* ordinal227 */
3502 	uint32_t set_resources_header;  /* ordinal228 */
3503 	uint32_t set_resources_dw1;  /* ordinal229 */
3504 	uint32_t set_resources_dw2;  /* ordinal230 */
3505 	uint32_t set_resources_dw3;  /* ordinal231 */
3506 	uint32_t set_resources_dw4;  /* ordinal232 */
3507 	uint32_t set_resources_dw5;  /* ordinal233 */
3508 	uint32_t set_resources_dw6;  /* ordinal234 */
3509 	uint32_t set_resources_dw7;  /* ordinal235 */
3510 	uint32_t reserved59;  /* ordinal236 */
3511 	uint32_t reserved60;  /* ordinal237 */
3512 	uint32_t reserved61;  /* ordinal238 */
3513 	uint32_t reserved62;  /* ordinal239 */
3514 	uint32_t reserved63;  /* ordinal240 */
3515 	uint32_t reserved64;  /* ordinal241 */
3516 	uint32_t reserved65;  /* ordinal242 */
3517 	uint32_t reserved66;  /* ordinal243 */
3518 	uint32_t reserved67;  /* ordinal244 */
3519 	uint32_t reserved68;  /* ordinal245 */
3520 	uint32_t reserved69;  /* ordinal246 */
3521 	uint32_t reserved70;  /* ordinal247 */
3522 	uint32_t reserved71;  /* ordinal248 */
3523 	uint32_t reserved72;  /* ordinal249 */
3524 	uint32_t reserved73;  /* ordinal250 */
3525 	uint32_t reserved74;  /* ordinal251 */
3526 	uint32_t reserved75;  /* ordinal252 */
3527 	uint32_t reserved76;  /* ordinal253 */
3528 	uint32_t reserved77;  /* ordinal254 */
3529 	uint32_t reserved78;  /* ordinal255 */
3530 
3531 	uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
3532 };
3533 
3534 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
3535 {
3536 	int i, r;
3537 
3538 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3539 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3540 
3541 		if (ring->mqd_obj) {
3542 			r = amdgpu_bo_reserve(ring->mqd_obj, false);
3543 			if (unlikely(r != 0))
3544 				dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
3545 
3546 			amdgpu_bo_unpin(ring->mqd_obj);
3547 			amdgpu_bo_unreserve(ring->mqd_obj);
3548 
3549 			amdgpu_bo_unref(&ring->mqd_obj);
3550 			ring->mqd_obj = NULL;
3551 		}
3552 	}
3553 }
3554 
3555 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
3556 {
3557 	int r, i, j;
3558 	u32 tmp;
3559 	bool use_doorbell = true;
3560 	u64 hqd_gpu_addr;
3561 	u64 mqd_gpu_addr;
3562 	u64 eop_gpu_addr;
3563 	u64 wb_gpu_addr;
3564 	u32 *buf;
3565 	struct vi_mqd *mqd;
3566 
3567 	/* init the pipes */
3568 	mutex_lock(&adev->srbm_mutex);
3569 	for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
3570 		int me = (i < 4) ? 1 : 2;
3571 		int pipe = (i < 4) ? i : (i - 4);
3572 
3573 		eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
3574 		eop_gpu_addr >>= 8;
3575 
3576 		vi_srbm_select(adev, me, pipe, 0, 0);
3577 
3578 		/* write the EOP addr */
3579 		WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
3580 		WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
3581 
3582 		/* set the VMID assigned */
3583 		WREG32(mmCP_HQD_VMID, 0);
3584 
3585 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3586 		tmp = RREG32(mmCP_HQD_EOP_CONTROL);
3587 		tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3588 				    (order_base_2(MEC_HPD_SIZE / 4) - 1));
3589 		WREG32(mmCP_HQD_EOP_CONTROL, tmp);
3590 	}
3591 	vi_srbm_select(adev, 0, 0, 0, 0);
3592 	mutex_unlock(&adev->srbm_mutex);
3593 
3594 	/* init the queues.  Just two for now. */
3595 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3596 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3597 
3598 		if (ring->mqd_obj == NULL) {
3599 			r = amdgpu_bo_create(adev,
3600 					     sizeof(struct vi_mqd),
3601 					     PAGE_SIZE, true,
3602 					     AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
3603 					     NULL, &ring->mqd_obj);
3604 			if (r) {
3605 				dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
3606 				return r;
3607 			}
3608 		}
3609 
3610 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3611 		if (unlikely(r != 0)) {
3612 			gfx_v8_0_cp_compute_fini(adev);
3613 			return r;
3614 		}
3615 		r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
3616 				  &mqd_gpu_addr);
3617 		if (r) {
3618 			dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
3619 			gfx_v8_0_cp_compute_fini(adev);
3620 			return r;
3621 		}
3622 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
3623 		if (r) {
3624 			dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
3625 			gfx_v8_0_cp_compute_fini(adev);
3626 			return r;
3627 		}
3628 
3629 		/* init the mqd struct */
3630 		memset(buf, 0, sizeof(struct vi_mqd));
3631 
3632 		mqd = (struct vi_mqd *)buf;
3633 		mqd->header = 0xC0310800;
3634 		mqd->compute_pipelinestat_enable = 0x00000001;
3635 		mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3636 		mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3637 		mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3638 		mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3639 		mqd->compute_misc_reserved = 0x00000003;
3640 
3641 		mutex_lock(&adev->srbm_mutex);
3642 		vi_srbm_select(adev, ring->me,
3643 			       ring->pipe,
3644 			       ring->queue, 0);
3645 
3646 		/* disable wptr polling */
3647 		tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
3648 		tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3649 		WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
3650 
3651 		mqd->cp_hqd_eop_base_addr_lo =
3652 			RREG32(mmCP_HQD_EOP_BASE_ADDR);
3653 		mqd->cp_hqd_eop_base_addr_hi =
3654 			RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
3655 
3656 		/* enable doorbell? */
3657 		tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3658 		if (use_doorbell) {
3659 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3660 		} else {
3661 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
3662 		}
3663 		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
3664 		mqd->cp_hqd_pq_doorbell_control = tmp;
3665 
3666 		/* disable the queue if it's active */
3667 		mqd->cp_hqd_dequeue_request = 0;
3668 		mqd->cp_hqd_pq_rptr = 0;
3669 		mqd->cp_hqd_pq_wptr= 0;
3670 		if (RREG32(mmCP_HQD_ACTIVE) & 1) {
3671 			WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
3672 			for (j = 0; j < adev->usec_timeout; j++) {
3673 				if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
3674 					break;
3675 				udelay(1);
3676 			}
3677 			WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
3678 			WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
3679 			WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
3680 		}
3681 
3682 		/* set the pointer to the MQD */
3683 		mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
3684 		mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3685 		WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
3686 		WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
3687 
3688 		/* set MQD vmid to 0 */
3689 		tmp = RREG32(mmCP_MQD_CONTROL);
3690 		tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3691 		WREG32(mmCP_MQD_CONTROL, tmp);
3692 		mqd->cp_mqd_control = tmp;
3693 
3694 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3695 		hqd_gpu_addr = ring->gpu_addr >> 8;
3696 		mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3697 		mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3698 		WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
3699 		WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
3700 
3701 		/* set up the HQD, this is similar to CP_RB0_CNTL */
3702 		tmp = RREG32(mmCP_HQD_PQ_CONTROL);
3703 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3704 				    (order_base_2(ring->ring_size / 4) - 1));
3705 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3706 			       ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3707 #ifdef __BIG_ENDIAN
3708 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3709 #endif
3710 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3711 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3712 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3713 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3714 		WREG32(mmCP_HQD_PQ_CONTROL, tmp);
3715 		mqd->cp_hqd_pq_control = tmp;
3716 
3717 		/* set the wb address wether it's enabled or not */
3718 		wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3719 		mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3720 		mqd->cp_hqd_pq_rptr_report_addr_hi =
3721 			upper_32_bits(wb_gpu_addr) & 0xffff;
3722 		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3723 		       mqd->cp_hqd_pq_rptr_report_addr_lo);
3724 		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3725 		       mqd->cp_hqd_pq_rptr_report_addr_hi);
3726 
3727 		/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3728 		wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3729 		mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3730 		mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3731 		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
3732 		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3733 		       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3734 
3735 		/* enable the doorbell if requested */
3736 		if (use_doorbell) {
3737 			if ((adev->asic_type == CHIP_CARRIZO) ||
3738 			    (adev->asic_type == CHIP_FIJI) ||
3739 			    (adev->asic_type == CHIP_STONEY)) {
3740 				WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
3741 				       AMDGPU_DOORBELL_KIQ << 2);
3742 				WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
3743 				       AMDGPU_DOORBELL_MEC_RING7 << 2);
3744 			}
3745 			tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3746 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3747 					    DOORBELL_OFFSET, ring->doorbell_index);
3748 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3749 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
3750 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
3751 			mqd->cp_hqd_pq_doorbell_control = tmp;
3752 
3753 		} else {
3754 			mqd->cp_hqd_pq_doorbell_control = 0;
3755 		}
3756 		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
3757 		       mqd->cp_hqd_pq_doorbell_control);
3758 
3759 		/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3760 		ring->wptr = 0;
3761 		mqd->cp_hqd_pq_wptr = ring->wptr;
3762 		WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
3763 		mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
3764 
3765 		/* set the vmid for the queue */
3766 		mqd->cp_hqd_vmid = 0;
3767 		WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3768 
3769 		tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
3770 		tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3771 		WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
3772 		mqd->cp_hqd_persistent_state = tmp;
3773 		if (adev->asic_type == CHIP_STONEY) {
3774 			tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
3775 			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
3776 			WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
3777 		}
3778 
3779 		/* activate the queue */
3780 		mqd->cp_hqd_active = 1;
3781 		WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
3782 
3783 		vi_srbm_select(adev, 0, 0, 0, 0);
3784 		mutex_unlock(&adev->srbm_mutex);
3785 
3786 		amdgpu_bo_kunmap(ring->mqd_obj);
3787 		amdgpu_bo_unreserve(ring->mqd_obj);
3788 	}
3789 
3790 	if (use_doorbell) {
3791 		tmp = RREG32(mmCP_PQ_STATUS);
3792 		tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3793 		WREG32(mmCP_PQ_STATUS, tmp);
3794 	}
3795 
3796 	gfx_v8_0_cp_compute_enable(adev, true);
3797 
3798 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3799 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3800 
3801 		ring->ready = true;
3802 		r = amdgpu_ring_test_ring(ring);
3803 		if (r)
3804 			ring->ready = false;
3805 	}
3806 
3807 	return 0;
3808 }
3809 
3810 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
3811 {
3812 	int r;
3813 
3814 	if (!(adev->flags & AMD_IS_APU))
3815 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3816 
3817 	if (!adev->pp_enabled) {
3818 		if (!adev->firmware.smu_load) {
3819 			/* legacy firmware loading */
3820 			r = gfx_v8_0_cp_gfx_load_microcode(adev);
3821 			if (r)
3822 				return r;
3823 
3824 			r = gfx_v8_0_cp_compute_load_microcode(adev);
3825 			if (r)
3826 				return r;
3827 		} else {
3828 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3829 							AMDGPU_UCODE_ID_CP_CE);
3830 			if (r)
3831 				return -EINVAL;
3832 
3833 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3834 							AMDGPU_UCODE_ID_CP_PFP);
3835 			if (r)
3836 				return -EINVAL;
3837 
3838 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3839 							AMDGPU_UCODE_ID_CP_ME);
3840 			if (r)
3841 				return -EINVAL;
3842 
3843 			if (adev->asic_type == CHIP_TOPAZ) {
3844 				r = gfx_v8_0_cp_compute_load_microcode(adev);
3845 				if (r)
3846 					return r;
3847 			} else {
3848 				r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3849 										 AMDGPU_UCODE_ID_CP_MEC1);
3850 				if (r)
3851 					return -EINVAL;
3852 			}
3853 		}
3854 	}
3855 
3856 	r = gfx_v8_0_cp_gfx_resume(adev);
3857 	if (r)
3858 		return r;
3859 
3860 	r = gfx_v8_0_cp_compute_resume(adev);
3861 	if (r)
3862 		return r;
3863 
3864 	gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3865 
3866 	return 0;
3867 }
3868 
3869 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
3870 {
3871 	gfx_v8_0_cp_gfx_enable(adev, enable);
3872 	gfx_v8_0_cp_compute_enable(adev, enable);
3873 }
3874 
3875 static int gfx_v8_0_hw_init(void *handle)
3876 {
3877 	int r;
3878 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3879 
3880 	gfx_v8_0_init_golden_registers(adev);
3881 
3882 	gfx_v8_0_gpu_init(adev);
3883 
3884 	r = gfx_v8_0_rlc_resume(adev);
3885 	if (r)
3886 		return r;
3887 
3888 	r = gfx_v8_0_cp_resume(adev);
3889 	if (r)
3890 		return r;
3891 
3892 	return r;
3893 }
3894 
3895 static int gfx_v8_0_hw_fini(void *handle)
3896 {
3897 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3898 
3899 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3900 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3901 	gfx_v8_0_cp_enable(adev, false);
3902 	gfx_v8_0_rlc_stop(adev);
3903 	gfx_v8_0_cp_compute_fini(adev);
3904 
3905 	return 0;
3906 }
3907 
3908 static int gfx_v8_0_suspend(void *handle)
3909 {
3910 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3911 
3912 	return gfx_v8_0_hw_fini(adev);
3913 }
3914 
3915 static int gfx_v8_0_resume(void *handle)
3916 {
3917 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3918 
3919 	return gfx_v8_0_hw_init(adev);
3920 }
3921 
3922 static bool gfx_v8_0_is_idle(void *handle)
3923 {
3924 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3925 
3926 	if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
3927 		return false;
3928 	else
3929 		return true;
3930 }
3931 
3932 static int gfx_v8_0_wait_for_idle(void *handle)
3933 {
3934 	unsigned i;
3935 	u32 tmp;
3936 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3937 
3938 	for (i = 0; i < adev->usec_timeout; i++) {
3939 		/* read MC_STATUS */
3940 		tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
3941 
3942 		if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
3943 			return 0;
3944 		udelay(1);
3945 	}
3946 	return -ETIMEDOUT;
3947 }
3948 
3949 static void gfx_v8_0_print_status(void *handle)
3950 {
3951 	int i;
3952 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3953 
3954 	dev_info(adev->dev, "GFX 8.x registers\n");
3955 	dev_info(adev->dev, "  GRBM_STATUS=0x%08X\n",
3956 		 RREG32(mmGRBM_STATUS));
3957 	dev_info(adev->dev, "  GRBM_STATUS2=0x%08X\n",
3958 		 RREG32(mmGRBM_STATUS2));
3959 	dev_info(adev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
3960 		 RREG32(mmGRBM_STATUS_SE0));
3961 	dev_info(adev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
3962 		 RREG32(mmGRBM_STATUS_SE1));
3963 	dev_info(adev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
3964 		 RREG32(mmGRBM_STATUS_SE2));
3965 	dev_info(adev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
3966 		 RREG32(mmGRBM_STATUS_SE3));
3967 	dev_info(adev->dev, "  CP_STAT = 0x%08x\n", RREG32(mmCP_STAT));
3968 	dev_info(adev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
3969 		 RREG32(mmCP_STALLED_STAT1));
3970 	dev_info(adev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
3971 		 RREG32(mmCP_STALLED_STAT2));
3972 	dev_info(adev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
3973 		 RREG32(mmCP_STALLED_STAT3));
3974 	dev_info(adev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
3975 		 RREG32(mmCP_CPF_BUSY_STAT));
3976 	dev_info(adev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
3977 		 RREG32(mmCP_CPF_STALLED_STAT1));
3978 	dev_info(adev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS));
3979 	dev_info(adev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT));
3980 	dev_info(adev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
3981 		 RREG32(mmCP_CPC_STALLED_STAT1));
3982 	dev_info(adev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS));
3983 
3984 	for (i = 0; i < 32; i++) {
3985 		dev_info(adev->dev, "  GB_TILE_MODE%d=0x%08X\n",
3986 			 i, RREG32(mmGB_TILE_MODE0 + (i * 4)));
3987 	}
3988 	for (i = 0; i < 16; i++) {
3989 		dev_info(adev->dev, "  GB_MACROTILE_MODE%d=0x%08X\n",
3990 			 i, RREG32(mmGB_MACROTILE_MODE0 + (i * 4)));
3991 	}
3992 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3993 		dev_info(adev->dev, "  se: %d\n", i);
3994 		gfx_v8_0_select_se_sh(adev, i, 0xffffffff);
3995 		dev_info(adev->dev, "  PA_SC_RASTER_CONFIG=0x%08X\n",
3996 			 RREG32(mmPA_SC_RASTER_CONFIG));
3997 		dev_info(adev->dev, "  PA_SC_RASTER_CONFIG_1=0x%08X\n",
3998 			 RREG32(mmPA_SC_RASTER_CONFIG_1));
3999 	}
4000 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
4001 
4002 	dev_info(adev->dev, "  GB_ADDR_CONFIG=0x%08X\n",
4003 		 RREG32(mmGB_ADDR_CONFIG));
4004 	dev_info(adev->dev, "  HDP_ADDR_CONFIG=0x%08X\n",
4005 		 RREG32(mmHDP_ADDR_CONFIG));
4006 	dev_info(adev->dev, "  DMIF_ADDR_CALC=0x%08X\n",
4007 		 RREG32(mmDMIF_ADDR_CALC));
4008 	dev_info(adev->dev, "  SDMA0_TILING_CONFIG=0x%08X\n",
4009 		 RREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET));
4010 	dev_info(adev->dev, "  SDMA1_TILING_CONFIG=0x%08X\n",
4011 		 RREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET));
4012 	dev_info(adev->dev, "  UVD_UDEC_ADDR_CONFIG=0x%08X\n",
4013 		 RREG32(mmUVD_UDEC_ADDR_CONFIG));
4014 	dev_info(adev->dev, "  UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n",
4015 		 RREG32(mmUVD_UDEC_DB_ADDR_CONFIG));
4016 	dev_info(adev->dev, "  UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n",
4017 		 RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG));
4018 
4019 	dev_info(adev->dev, "  CP_MEQ_THRESHOLDS=0x%08X\n",
4020 		 RREG32(mmCP_MEQ_THRESHOLDS));
4021 	dev_info(adev->dev, "  SX_DEBUG_1=0x%08X\n",
4022 		 RREG32(mmSX_DEBUG_1));
4023 	dev_info(adev->dev, "  TA_CNTL_AUX=0x%08X\n",
4024 		 RREG32(mmTA_CNTL_AUX));
4025 	dev_info(adev->dev, "  SPI_CONFIG_CNTL=0x%08X\n",
4026 		 RREG32(mmSPI_CONFIG_CNTL));
4027 	dev_info(adev->dev, "  SQ_CONFIG=0x%08X\n",
4028 		 RREG32(mmSQ_CONFIG));
4029 	dev_info(adev->dev, "  DB_DEBUG=0x%08X\n",
4030 		 RREG32(mmDB_DEBUG));
4031 	dev_info(adev->dev, "  DB_DEBUG2=0x%08X\n",
4032 		 RREG32(mmDB_DEBUG2));
4033 	dev_info(adev->dev, "  DB_DEBUG3=0x%08X\n",
4034 		 RREG32(mmDB_DEBUG3));
4035 	dev_info(adev->dev, "  CB_HW_CONTROL=0x%08X\n",
4036 		 RREG32(mmCB_HW_CONTROL));
4037 	dev_info(adev->dev, "  SPI_CONFIG_CNTL_1=0x%08X\n",
4038 		 RREG32(mmSPI_CONFIG_CNTL_1));
4039 	dev_info(adev->dev, "  PA_SC_FIFO_SIZE=0x%08X\n",
4040 		 RREG32(mmPA_SC_FIFO_SIZE));
4041 	dev_info(adev->dev, "  VGT_NUM_INSTANCES=0x%08X\n",
4042 		 RREG32(mmVGT_NUM_INSTANCES));
4043 	dev_info(adev->dev, "  CP_PERFMON_CNTL=0x%08X\n",
4044 		 RREG32(mmCP_PERFMON_CNTL));
4045 	dev_info(adev->dev, "  PA_SC_FORCE_EOV_MAX_CNTS=0x%08X\n",
4046 		 RREG32(mmPA_SC_FORCE_EOV_MAX_CNTS));
4047 	dev_info(adev->dev, "  VGT_CACHE_INVALIDATION=0x%08X\n",
4048 		 RREG32(mmVGT_CACHE_INVALIDATION));
4049 	dev_info(adev->dev, "  VGT_GS_VERTEX_REUSE=0x%08X\n",
4050 		 RREG32(mmVGT_GS_VERTEX_REUSE));
4051 	dev_info(adev->dev, "  PA_SC_LINE_STIPPLE_STATE=0x%08X\n",
4052 		 RREG32(mmPA_SC_LINE_STIPPLE_STATE));
4053 	dev_info(adev->dev, "  PA_CL_ENHANCE=0x%08X\n",
4054 		 RREG32(mmPA_CL_ENHANCE));
4055 	dev_info(adev->dev, "  PA_SC_ENHANCE=0x%08X\n",
4056 		 RREG32(mmPA_SC_ENHANCE));
4057 
4058 	dev_info(adev->dev, "  CP_ME_CNTL=0x%08X\n",
4059 		 RREG32(mmCP_ME_CNTL));
4060 	dev_info(adev->dev, "  CP_MAX_CONTEXT=0x%08X\n",
4061 		 RREG32(mmCP_MAX_CONTEXT));
4062 	dev_info(adev->dev, "  CP_ENDIAN_SWAP=0x%08X\n",
4063 		 RREG32(mmCP_ENDIAN_SWAP));
4064 	dev_info(adev->dev, "  CP_DEVICE_ID=0x%08X\n",
4065 		 RREG32(mmCP_DEVICE_ID));
4066 
4067 	dev_info(adev->dev, "  CP_SEM_WAIT_TIMER=0x%08X\n",
4068 		 RREG32(mmCP_SEM_WAIT_TIMER));
4069 
4070 	dev_info(adev->dev, "  CP_RB_WPTR_DELAY=0x%08X\n",
4071 		 RREG32(mmCP_RB_WPTR_DELAY));
4072 	dev_info(adev->dev, "  CP_RB_VMID=0x%08X\n",
4073 		 RREG32(mmCP_RB_VMID));
4074 	dev_info(adev->dev, "  CP_RB0_CNTL=0x%08X\n",
4075 		 RREG32(mmCP_RB0_CNTL));
4076 	dev_info(adev->dev, "  CP_RB0_WPTR=0x%08X\n",
4077 		 RREG32(mmCP_RB0_WPTR));
4078 	dev_info(adev->dev, "  CP_RB0_RPTR_ADDR=0x%08X\n",
4079 		 RREG32(mmCP_RB0_RPTR_ADDR));
4080 	dev_info(adev->dev, "  CP_RB0_RPTR_ADDR_HI=0x%08X\n",
4081 		 RREG32(mmCP_RB0_RPTR_ADDR_HI));
4082 	dev_info(adev->dev, "  CP_RB0_CNTL=0x%08X\n",
4083 		 RREG32(mmCP_RB0_CNTL));
4084 	dev_info(adev->dev, "  CP_RB0_BASE=0x%08X\n",
4085 		 RREG32(mmCP_RB0_BASE));
4086 	dev_info(adev->dev, "  CP_RB0_BASE_HI=0x%08X\n",
4087 		 RREG32(mmCP_RB0_BASE_HI));
4088 	dev_info(adev->dev, "  CP_MEC_CNTL=0x%08X\n",
4089 		 RREG32(mmCP_MEC_CNTL));
4090 	dev_info(adev->dev, "  CP_CPF_DEBUG=0x%08X\n",
4091 		 RREG32(mmCP_CPF_DEBUG));
4092 
4093 	dev_info(adev->dev, "  SCRATCH_ADDR=0x%08X\n",
4094 		 RREG32(mmSCRATCH_ADDR));
4095 	dev_info(adev->dev, "  SCRATCH_UMSK=0x%08X\n",
4096 		 RREG32(mmSCRATCH_UMSK));
4097 
4098 	dev_info(adev->dev, "  CP_INT_CNTL_RING0=0x%08X\n",
4099 		 RREG32(mmCP_INT_CNTL_RING0));
4100 	dev_info(adev->dev, "  RLC_LB_CNTL=0x%08X\n",
4101 		 RREG32(mmRLC_LB_CNTL));
4102 	dev_info(adev->dev, "  RLC_CNTL=0x%08X\n",
4103 		 RREG32(mmRLC_CNTL));
4104 	dev_info(adev->dev, "  RLC_CGCG_CGLS_CTRL=0x%08X\n",
4105 		 RREG32(mmRLC_CGCG_CGLS_CTRL));
4106 	dev_info(adev->dev, "  RLC_LB_CNTR_INIT=0x%08X\n",
4107 		 RREG32(mmRLC_LB_CNTR_INIT));
4108 	dev_info(adev->dev, "  RLC_LB_CNTR_MAX=0x%08X\n",
4109 		 RREG32(mmRLC_LB_CNTR_MAX));
4110 	dev_info(adev->dev, "  RLC_LB_INIT_CU_MASK=0x%08X\n",
4111 		 RREG32(mmRLC_LB_INIT_CU_MASK));
4112 	dev_info(adev->dev, "  RLC_LB_PARAMS=0x%08X\n",
4113 		 RREG32(mmRLC_LB_PARAMS));
4114 	dev_info(adev->dev, "  RLC_LB_CNTL=0x%08X\n",
4115 		 RREG32(mmRLC_LB_CNTL));
4116 	dev_info(adev->dev, "  RLC_MC_CNTL=0x%08X\n",
4117 		 RREG32(mmRLC_MC_CNTL));
4118 	dev_info(adev->dev, "  RLC_UCODE_CNTL=0x%08X\n",
4119 		 RREG32(mmRLC_UCODE_CNTL));
4120 
4121 	mutex_lock(&adev->srbm_mutex);
4122 	for (i = 0; i < 16; i++) {
4123 		vi_srbm_select(adev, 0, 0, 0, i);
4124 		dev_info(adev->dev, "  VM %d:\n", i);
4125 		dev_info(adev->dev, "  SH_MEM_CONFIG=0x%08X\n",
4126 			 RREG32(mmSH_MEM_CONFIG));
4127 		dev_info(adev->dev, "  SH_MEM_APE1_BASE=0x%08X\n",
4128 			 RREG32(mmSH_MEM_APE1_BASE));
4129 		dev_info(adev->dev, "  SH_MEM_APE1_LIMIT=0x%08X\n",
4130 			 RREG32(mmSH_MEM_APE1_LIMIT));
4131 		dev_info(adev->dev, "  SH_MEM_BASES=0x%08X\n",
4132 			 RREG32(mmSH_MEM_BASES));
4133 	}
4134 	vi_srbm_select(adev, 0, 0, 0, 0);
4135 	mutex_unlock(&adev->srbm_mutex);
4136 }
4137 
4138 static int gfx_v8_0_soft_reset(void *handle)
4139 {
4140 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4141 	u32 tmp;
4142 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4143 
4144 	/* GRBM_STATUS */
4145 	tmp = RREG32(mmGRBM_STATUS);
4146 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4147 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4148 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4149 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4150 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4151 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4152 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4153 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4154 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4155 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4156 	}
4157 
4158 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4159 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4160 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4161 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4162 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4163 	}
4164 
4165 	/* GRBM_STATUS2 */
4166 	tmp = RREG32(mmGRBM_STATUS2);
4167 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4168 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4169 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4170 
4171 	/* SRBM_STATUS */
4172 	tmp = RREG32(mmSRBM_STATUS);
4173 	if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4174 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4175 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4176 
4177 	if (grbm_soft_reset || srbm_soft_reset) {
4178 		gfx_v8_0_print_status((void *)adev);
4179 		/* stop the rlc */
4180 		gfx_v8_0_rlc_stop(adev);
4181 
4182 		/* Disable GFX parsing/prefetching */
4183 		gfx_v8_0_cp_gfx_enable(adev, false);
4184 
4185 		/* Disable MEC parsing/prefetching */
4186 		gfx_v8_0_cp_compute_enable(adev, false);
4187 
4188 		if (grbm_soft_reset || srbm_soft_reset) {
4189 			tmp = RREG32(mmGMCON_DEBUG);
4190 			tmp = REG_SET_FIELD(tmp,
4191 					    GMCON_DEBUG, GFX_STALL, 1);
4192 			tmp = REG_SET_FIELD(tmp,
4193 					    GMCON_DEBUG, GFX_CLEAR, 1);
4194 			WREG32(mmGMCON_DEBUG, tmp);
4195 
4196 			udelay(50);
4197 		}
4198 
4199 		if (grbm_soft_reset) {
4200 			tmp = RREG32(mmGRBM_SOFT_RESET);
4201 			tmp |= grbm_soft_reset;
4202 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4203 			WREG32(mmGRBM_SOFT_RESET, tmp);
4204 			tmp = RREG32(mmGRBM_SOFT_RESET);
4205 
4206 			udelay(50);
4207 
4208 			tmp &= ~grbm_soft_reset;
4209 			WREG32(mmGRBM_SOFT_RESET, tmp);
4210 			tmp = RREG32(mmGRBM_SOFT_RESET);
4211 		}
4212 
4213 		if (srbm_soft_reset) {
4214 			tmp = RREG32(mmSRBM_SOFT_RESET);
4215 			tmp |= srbm_soft_reset;
4216 			dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4217 			WREG32(mmSRBM_SOFT_RESET, tmp);
4218 			tmp = RREG32(mmSRBM_SOFT_RESET);
4219 
4220 			udelay(50);
4221 
4222 			tmp &= ~srbm_soft_reset;
4223 			WREG32(mmSRBM_SOFT_RESET, tmp);
4224 			tmp = RREG32(mmSRBM_SOFT_RESET);
4225 		}
4226 
4227 		if (grbm_soft_reset || srbm_soft_reset) {
4228 			tmp = RREG32(mmGMCON_DEBUG);
4229 			tmp = REG_SET_FIELD(tmp,
4230 					    GMCON_DEBUG, GFX_STALL, 0);
4231 			tmp = REG_SET_FIELD(tmp,
4232 					    GMCON_DEBUG, GFX_CLEAR, 0);
4233 			WREG32(mmGMCON_DEBUG, tmp);
4234 		}
4235 
4236 		/* Wait a little for things to settle down */
4237 		udelay(50);
4238 		gfx_v8_0_print_status((void *)adev);
4239 	}
4240 	return 0;
4241 }
4242 
4243 /**
4244  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
4245  *
4246  * @adev: amdgpu_device pointer
4247  *
4248  * Fetches a GPU clock counter snapshot.
4249  * Returns the 64 bit clock counter snapshot.
4250  */
4251 uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4252 {
4253 	uint64_t clock;
4254 
4255 	mutex_lock(&adev->gfx.gpu_clock_mutex);
4256 	WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4257 	clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
4258 		((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4259 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
4260 	return clock;
4261 }
4262 
4263 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4264 					  uint32_t vmid,
4265 					  uint32_t gds_base, uint32_t gds_size,
4266 					  uint32_t gws_base, uint32_t gws_size,
4267 					  uint32_t oa_base, uint32_t oa_size)
4268 {
4269 	gds_base = gds_base >> AMDGPU_GDS_SHIFT;
4270 	gds_size = gds_size >> AMDGPU_GDS_SHIFT;
4271 
4272 	gws_base = gws_base >> AMDGPU_GWS_SHIFT;
4273 	gws_size = gws_size >> AMDGPU_GWS_SHIFT;
4274 
4275 	oa_base = oa_base >> AMDGPU_OA_SHIFT;
4276 	oa_size = oa_size >> AMDGPU_OA_SHIFT;
4277 
4278 	/* GDS Base */
4279 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4280 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4281 				WRITE_DATA_DST_SEL(0)));
4282 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
4283 	amdgpu_ring_write(ring, 0);
4284 	amdgpu_ring_write(ring, gds_base);
4285 
4286 	/* GDS Size */
4287 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4288 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4289 				WRITE_DATA_DST_SEL(0)));
4290 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
4291 	amdgpu_ring_write(ring, 0);
4292 	amdgpu_ring_write(ring, gds_size);
4293 
4294 	/* GWS */
4295 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4296 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4297 				WRITE_DATA_DST_SEL(0)));
4298 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
4299 	amdgpu_ring_write(ring, 0);
4300 	amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4301 
4302 	/* OA */
4303 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4304 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4305 				WRITE_DATA_DST_SEL(0)));
4306 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
4307 	amdgpu_ring_write(ring, 0);
4308 	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
4309 }
4310 
4311 static int gfx_v8_0_early_init(void *handle)
4312 {
4313 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4314 
4315 	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
4316 	adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
4317 	gfx_v8_0_set_ring_funcs(adev);
4318 	gfx_v8_0_set_irq_funcs(adev);
4319 	gfx_v8_0_set_gds_init(adev);
4320 
4321 	return 0;
4322 }
4323 
4324 static int gfx_v8_0_late_init(void *handle)
4325 {
4326 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4327 	int r;
4328 
4329 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4330 	if (r)
4331 		return r;
4332 
4333 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4334 	if (r)
4335 		return r;
4336 
4337 	/* requires IBs so do in late init after IB pool is initialized */
4338 	r = gfx_v8_0_do_edc_gpr_workarounds(adev);
4339 	if (r)
4340 		return r;
4341 
4342 	return 0;
4343 }
4344 
4345 static int gfx_v8_0_set_powergating_state(void *handle,
4346 					  enum amd_powergating_state state)
4347 {
4348 	return 0;
4349 }
4350 
4351 static void fiji_send_serdes_cmd(struct amdgpu_device *adev,
4352 		uint32_t reg_addr, uint32_t cmd)
4353 {
4354 	uint32_t data;
4355 
4356 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
4357 
4358 	WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
4359 	WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
4360 
4361 	data = RREG32(mmRLC_SERDES_WR_CTRL);
4362 	data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
4363 			RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
4364 			RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
4365 			RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
4366 			RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
4367 			RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
4368 			RLC_SERDES_WR_CTRL__POWER_UP_MASK |
4369 			RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
4370 			RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
4371 			RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
4372 			RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
4373 	data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
4374 			(cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
4375 			(reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
4376 			(0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
4377 
4378 	WREG32(mmRLC_SERDES_WR_CTRL, data);
4379 }
4380 
4381 static void fiji_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4382 		bool enable)
4383 {
4384 	uint32_t temp, data;
4385 
4386 	/* It is disabled by HW by default */
4387 	if (enable) {
4388 		/* 1 - RLC memory Light sleep */
4389 		temp = data = RREG32(mmRLC_MEM_SLP_CNTL);
4390 		data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4391 		if (temp != data)
4392 			WREG32(mmRLC_MEM_SLP_CNTL, data);
4393 
4394 		/* 2 - CP memory Light sleep */
4395 		temp = data = RREG32(mmCP_MEM_SLP_CNTL);
4396 		data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4397 		if (temp != data)
4398 			WREG32(mmCP_MEM_SLP_CNTL, data);
4399 
4400 		/* 3 - RLC_CGTT_MGCG_OVERRIDE */
4401 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4402 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
4403 				RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
4404 				RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
4405 				RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
4406 
4407 		if (temp != data)
4408 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
4409 
4410 		/* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4411 		gfx_v8_0_wait_for_rlc_serdes(adev);
4412 
4413 		/* 5 - clear mgcg override */
4414 		fiji_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
4415 
4416 		/* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
4417 		temp = data = RREG32(mmCGTS_SM_CTRL_REG);
4418 		data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
4419 		data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
4420 		data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
4421 		data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
4422 		data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
4423 		data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
4424 		data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
4425 		if (temp != data)
4426 			WREG32(mmCGTS_SM_CTRL_REG, data);
4427 		udelay(50);
4428 
4429 		/* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4430 		gfx_v8_0_wait_for_rlc_serdes(adev);
4431 	} else {
4432 		/* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
4433 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4434 		data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
4435 				RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
4436 				RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
4437 				RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
4438 		if (temp != data)
4439 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
4440 
4441 		/* 2 - disable MGLS in RLC */
4442 		data = RREG32(mmRLC_MEM_SLP_CNTL);
4443 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4444 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4445 			WREG32(mmRLC_MEM_SLP_CNTL, data);
4446 		}
4447 
4448 		/* 3 - disable MGLS in CP */
4449 		data = RREG32(mmCP_MEM_SLP_CNTL);
4450 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4451 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4452 			WREG32(mmCP_MEM_SLP_CNTL, data);
4453 		}
4454 
4455 		/* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
4456 		temp = data = RREG32(mmCGTS_SM_CTRL_REG);
4457 		data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
4458 				CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
4459 		if (temp != data)
4460 			WREG32(mmCGTS_SM_CTRL_REG, data);
4461 
4462 		/* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4463 		gfx_v8_0_wait_for_rlc_serdes(adev);
4464 
4465 		/* 6 - set mgcg override */
4466 		fiji_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
4467 
4468 		udelay(50);
4469 
4470 		/* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4471 		gfx_v8_0_wait_for_rlc_serdes(adev);
4472 	}
4473 }
4474 
4475 static void fiji_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4476 		bool enable)
4477 {
4478 	uint32_t temp, temp1, data, data1;
4479 
4480 	temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
4481 
4482 	if (enable) {
4483 		/* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
4484 		 * Cmp_busy/GFX_Idle interrupts
4485 		 */
4486 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4487 
4488 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4489 		data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
4490 		if (temp1 != data1)
4491 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4492 
4493 		/* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4494 		gfx_v8_0_wait_for_rlc_serdes(adev);
4495 
4496 		/* 3 - clear cgcg override */
4497 		fiji_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
4498 
4499 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4500 		gfx_v8_0_wait_for_rlc_serdes(adev);
4501 
4502 		/* 4 - write cmd to set CGLS */
4503 		fiji_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
4504 
4505 		/* 5 - enable cgcg */
4506 		data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4507 
4508 		/* enable cgls*/
4509 		data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4510 
4511 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4512 		data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
4513 
4514 		if (temp1 != data1)
4515 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4516 
4517 		if (temp != data)
4518 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
4519 	} else {
4520 		/* disable cntx_empty_int_enable & GFX Idle interrupt */
4521 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4522 
4523 		/* TEST CGCG */
4524 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4525 		data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
4526 				RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
4527 		if (temp1 != data1)
4528 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4529 
4530 		/* read gfx register to wake up cgcg */
4531 		RREG32(mmCB_CGTT_SCLK_CTRL);
4532 		RREG32(mmCB_CGTT_SCLK_CTRL);
4533 		RREG32(mmCB_CGTT_SCLK_CTRL);
4534 		RREG32(mmCB_CGTT_SCLK_CTRL);
4535 
4536 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4537 		gfx_v8_0_wait_for_rlc_serdes(adev);
4538 
4539 		/* write cmd to Set CGCG Overrride */
4540 		fiji_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
4541 
4542 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4543 		gfx_v8_0_wait_for_rlc_serdes(adev);
4544 
4545 		/* write cmd to Clear CGLS */
4546 		fiji_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
4547 
4548 		/* disable cgcg, cgls should be disabled too. */
4549 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4550 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4551 		if (temp != data)
4552 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
4553 	}
4554 }
4555 static int fiji_update_gfx_clock_gating(struct amdgpu_device *adev,
4556 		bool enable)
4557 {
4558 	if (enable) {
4559 		/* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
4560 		 * ===  MGCG + MGLS + TS(CG/LS) ===
4561 		 */
4562 		fiji_update_medium_grain_clock_gating(adev, enable);
4563 		fiji_update_coarse_grain_clock_gating(adev, enable);
4564 	} else {
4565 		/* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
4566 		 * ===  CGCG + CGLS ===
4567 		 */
4568 		fiji_update_coarse_grain_clock_gating(adev, enable);
4569 		fiji_update_medium_grain_clock_gating(adev, enable);
4570 	}
4571 	return 0;
4572 }
4573 
4574 static int gfx_v8_0_set_clockgating_state(void *handle,
4575 					  enum amd_clockgating_state state)
4576 {
4577 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4578 
4579 	switch (adev->asic_type) {
4580 	case CHIP_FIJI:
4581 		fiji_update_gfx_clock_gating(adev,
4582 				state == AMD_CG_STATE_GATE ? true : false);
4583 		break;
4584 	default:
4585 		break;
4586 	}
4587 	return 0;
4588 }
4589 
4590 static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4591 {
4592 	u32 rptr;
4593 
4594 	rptr = ring->adev->wb.wb[ring->rptr_offs];
4595 
4596 	return rptr;
4597 }
4598 
4599 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4600 {
4601 	struct amdgpu_device *adev = ring->adev;
4602 	u32 wptr;
4603 
4604 	if (ring->use_doorbell)
4605 		/* XXX check if swapping is necessary on BE */
4606 		wptr = ring->adev->wb.wb[ring->wptr_offs];
4607 	else
4608 		wptr = RREG32(mmCP_RB0_WPTR);
4609 
4610 	return wptr;
4611 }
4612 
4613 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4614 {
4615 	struct amdgpu_device *adev = ring->adev;
4616 
4617 	if (ring->use_doorbell) {
4618 		/* XXX check if swapping is necessary on BE */
4619 		adev->wb.wb[ring->wptr_offs] = ring->wptr;
4620 		WDOORBELL32(ring->doorbell_index, ring->wptr);
4621 	} else {
4622 		WREG32(mmCP_RB0_WPTR, ring->wptr);
4623 		(void)RREG32(mmCP_RB0_WPTR);
4624 	}
4625 }
4626 
4627 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4628 {
4629 	u32 ref_and_mask, reg_mem_engine;
4630 
4631 	if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
4632 		switch (ring->me) {
4633 		case 1:
4634 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
4635 			break;
4636 		case 2:
4637 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
4638 			break;
4639 		default:
4640 			return;
4641 		}
4642 		reg_mem_engine = 0;
4643 	} else {
4644 		ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
4645 		reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
4646 	}
4647 
4648 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4649 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
4650 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
4651 				 reg_mem_engine));
4652 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
4653 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
4654 	amdgpu_ring_write(ring, ref_and_mask);
4655 	amdgpu_ring_write(ring, ref_and_mask);
4656 	amdgpu_ring_write(ring, 0x20); /* poll interval */
4657 }
4658 
4659 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4660 				  struct amdgpu_ib *ib)
4661 {
4662 	bool need_ctx_switch = ring->current_ctx != ib->ctx;
4663 	u32 header, control = 0;
4664 	u32 next_rptr = ring->wptr + 5;
4665 
4666 	/* drop the CE preamble IB for the same context */
4667 	if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && !need_ctx_switch)
4668 		return;
4669 
4670 	if (need_ctx_switch)
4671 		next_rptr += 2;
4672 
4673 	next_rptr += 4;
4674 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4675 	amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
4676 	amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4677 	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4678 	amdgpu_ring_write(ring, next_rptr);
4679 
4680 	/* insert SWITCH_BUFFER packet before first IB in the ring frame */
4681 	if (need_ctx_switch) {
4682 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4683 		amdgpu_ring_write(ring, 0);
4684 	}
4685 
4686 	if (ib->flags & AMDGPU_IB_FLAG_CE)
4687 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4688 	else
4689 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4690 
4691 	control |= ib->length_dw |
4692 		(ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
4693 
4694 	amdgpu_ring_write(ring, header);
4695 	amdgpu_ring_write(ring,
4696 #ifdef __BIG_ENDIAN
4697 			  (2 << 0) |
4698 #endif
4699 			  (ib->gpu_addr & 0xFFFFFFFC));
4700 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4701 	amdgpu_ring_write(ring, control);
4702 }
4703 
4704 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4705 				  struct amdgpu_ib *ib)
4706 {
4707 	u32 header, control = 0;
4708 	u32 next_rptr = ring->wptr + 5;
4709 
4710 	control |= INDIRECT_BUFFER_VALID;
4711 
4712 	next_rptr += 4;
4713 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4714 	amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
4715 	amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4716 	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4717 	amdgpu_ring_write(ring, next_rptr);
4718 
4719 	header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4720 
4721 	control |= ib->length_dw |
4722 			   (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
4723 
4724 	amdgpu_ring_write(ring, header);
4725 	amdgpu_ring_write(ring,
4726 #ifdef __BIG_ENDIAN
4727 					  (2 << 0) |
4728 #endif
4729 					  (ib->gpu_addr & 0xFFFFFFFC));
4730 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4731 	amdgpu_ring_write(ring, control);
4732 }
4733 
4734 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
4735 					 u64 seq, unsigned flags)
4736 {
4737 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4738 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4739 
4740 	/* EVENT_WRITE_EOP - flush caches, send int */
4741 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
4742 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
4743 				 EOP_TC_ACTION_EN |
4744 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4745 				 EVENT_INDEX(5)));
4746 	amdgpu_ring_write(ring, addr & 0xfffffffc);
4747 	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
4748 			  DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4749 	amdgpu_ring_write(ring, lower_32_bits(seq));
4750 	amdgpu_ring_write(ring, upper_32_bits(seq));
4751 
4752 }
4753 
4754 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4755 					unsigned vm_id, uint64_t pd_addr)
4756 {
4757 	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
4758 	uint32_t seq = ring->fence_drv.sync_seq;
4759 	uint64_t addr = ring->fence_drv.gpu_addr;
4760 
4761 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4762 	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
4763 		 WAIT_REG_MEM_FUNCTION(3))); /* equal */
4764 	amdgpu_ring_write(ring, addr & 0xfffffffc);
4765 	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
4766 	amdgpu_ring_write(ring, seq);
4767 	amdgpu_ring_write(ring, 0xffffffff);
4768 	amdgpu_ring_write(ring, 4); /* poll interval */
4769 
4770 	if (usepfp) {
4771 		/* synce CE with ME to prevent CE fetch CEIB before context switch done */
4772 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4773 		amdgpu_ring_write(ring, 0);
4774 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4775 		amdgpu_ring_write(ring, 0);
4776 	}
4777 
4778 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4779 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
4780 				 WRITE_DATA_DST_SEL(0)) |
4781 				 WR_CONFIRM);
4782 	if (vm_id < 8) {
4783 		amdgpu_ring_write(ring,
4784 				  (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
4785 	} else {
4786 		amdgpu_ring_write(ring,
4787 				  (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
4788 	}
4789 	amdgpu_ring_write(ring, 0);
4790 	amdgpu_ring_write(ring, pd_addr >> 12);
4791 
4792 	/* bits 0-15 are the VM contexts0-15 */
4793 	/* invalidate the cache */
4794 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4795 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4796 				 WRITE_DATA_DST_SEL(0)));
4797 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
4798 	amdgpu_ring_write(ring, 0);
4799 	amdgpu_ring_write(ring, 1 << vm_id);
4800 
4801 	/* wait for the invalidate to complete */
4802 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4803 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
4804 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
4805 				 WAIT_REG_MEM_ENGINE(0))); /* me */
4806 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
4807 	amdgpu_ring_write(ring, 0);
4808 	amdgpu_ring_write(ring, 0); /* ref */
4809 	amdgpu_ring_write(ring, 0); /* mask */
4810 	amdgpu_ring_write(ring, 0x20); /* poll interval */
4811 
4812 	/* compute doesn't have PFP */
4813 	if (usepfp) {
4814 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
4815 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4816 		amdgpu_ring_write(ring, 0x0);
4817 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4818 		amdgpu_ring_write(ring, 0);
4819 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4820 		amdgpu_ring_write(ring, 0);
4821 	}
4822 }
4823 
4824 static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4825 {
4826 	return ring->adev->wb.wb[ring->rptr_offs];
4827 }
4828 
4829 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4830 {
4831 	return ring->adev->wb.wb[ring->wptr_offs];
4832 }
4833 
4834 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4835 {
4836 	struct amdgpu_device *adev = ring->adev;
4837 
4838 	/* XXX check if swapping is necessary on BE */
4839 	adev->wb.wb[ring->wptr_offs] = ring->wptr;
4840 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4841 }
4842 
4843 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
4844 					     u64 addr, u64 seq,
4845 					     unsigned flags)
4846 {
4847 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4848 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4849 
4850 	/* RELEASE_MEM - flush caches, send int */
4851 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
4852 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
4853 				 EOP_TC_ACTION_EN |
4854 				 EOP_TC_WB_ACTION_EN |
4855 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4856 				 EVENT_INDEX(5)));
4857 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4858 	amdgpu_ring_write(ring, addr & 0xfffffffc);
4859 	amdgpu_ring_write(ring, upper_32_bits(addr));
4860 	amdgpu_ring_write(ring, lower_32_bits(seq));
4861 	amdgpu_ring_write(ring, upper_32_bits(seq));
4862 }
4863 
4864 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4865 						 enum amdgpu_interrupt_state state)
4866 {
4867 	u32 cp_int_cntl;
4868 
4869 	switch (state) {
4870 	case AMDGPU_IRQ_STATE_DISABLE:
4871 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4872 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4873 					    TIME_STAMP_INT_ENABLE, 0);
4874 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4875 		break;
4876 	case AMDGPU_IRQ_STATE_ENABLE:
4877 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4878 		cp_int_cntl =
4879 			REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4880 				      TIME_STAMP_INT_ENABLE, 1);
4881 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4882 		break;
4883 	default:
4884 		break;
4885 	}
4886 }
4887 
4888 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4889 						     int me, int pipe,
4890 						     enum amdgpu_interrupt_state state)
4891 {
4892 	u32 mec_int_cntl, mec_int_cntl_reg;
4893 
4894 	/*
4895 	 * amdgpu controls only pipe 0 of MEC1. That's why this function only
4896 	 * handles the setting of interrupts for this specific pipe. All other
4897 	 * pipes' interrupts are set by amdkfd.
4898 	 */
4899 
4900 	if (me == 1) {
4901 		switch (pipe) {
4902 		case 0:
4903 			mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
4904 			break;
4905 		default:
4906 			DRM_DEBUG("invalid pipe %d\n", pipe);
4907 			return;
4908 		}
4909 	} else {
4910 		DRM_DEBUG("invalid me %d\n", me);
4911 		return;
4912 	}
4913 
4914 	switch (state) {
4915 	case AMDGPU_IRQ_STATE_DISABLE:
4916 		mec_int_cntl = RREG32(mec_int_cntl_reg);
4917 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4918 					     TIME_STAMP_INT_ENABLE, 0);
4919 		WREG32(mec_int_cntl_reg, mec_int_cntl);
4920 		break;
4921 	case AMDGPU_IRQ_STATE_ENABLE:
4922 		mec_int_cntl = RREG32(mec_int_cntl_reg);
4923 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4924 					     TIME_STAMP_INT_ENABLE, 1);
4925 		WREG32(mec_int_cntl_reg, mec_int_cntl);
4926 		break;
4927 	default:
4928 		break;
4929 	}
4930 }
4931 
4932 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4933 					     struct amdgpu_irq_src *source,
4934 					     unsigned type,
4935 					     enum amdgpu_interrupt_state state)
4936 {
4937 	u32 cp_int_cntl;
4938 
4939 	switch (state) {
4940 	case AMDGPU_IRQ_STATE_DISABLE:
4941 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4942 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4943 					    PRIV_REG_INT_ENABLE, 0);
4944 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4945 		break;
4946 	case AMDGPU_IRQ_STATE_ENABLE:
4947 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4948 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4949 					    PRIV_REG_INT_ENABLE, 0);
4950 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4951 		break;
4952 	default:
4953 		break;
4954 	}
4955 
4956 	return 0;
4957 }
4958 
4959 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4960 					      struct amdgpu_irq_src *source,
4961 					      unsigned type,
4962 					      enum amdgpu_interrupt_state state)
4963 {
4964 	u32 cp_int_cntl;
4965 
4966 	switch (state) {
4967 	case AMDGPU_IRQ_STATE_DISABLE:
4968 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4969 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4970 					    PRIV_INSTR_INT_ENABLE, 0);
4971 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4972 		break;
4973 	case AMDGPU_IRQ_STATE_ENABLE:
4974 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4975 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4976 					    PRIV_INSTR_INT_ENABLE, 1);
4977 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4978 		break;
4979 	default:
4980 		break;
4981 	}
4982 
4983 	return 0;
4984 }
4985 
4986 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
4987 					    struct amdgpu_irq_src *src,
4988 					    unsigned type,
4989 					    enum amdgpu_interrupt_state state)
4990 {
4991 	switch (type) {
4992 	case AMDGPU_CP_IRQ_GFX_EOP:
4993 		gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
4994 		break;
4995 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
4996 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
4997 		break;
4998 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
4999 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5000 		break;
5001 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5002 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5003 		break;
5004 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5005 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5006 		break;
5007 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5008 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5009 		break;
5010 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5011 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5012 		break;
5013 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5014 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5015 		break;
5016 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5017 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5018 		break;
5019 	default:
5020 		break;
5021 	}
5022 	return 0;
5023 }
5024 
5025 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
5026 			    struct amdgpu_irq_src *source,
5027 			    struct amdgpu_iv_entry *entry)
5028 {
5029 	int i;
5030 	u8 me_id, pipe_id, queue_id;
5031 	struct amdgpu_ring *ring;
5032 
5033 	DRM_DEBUG("IH: CP EOP\n");
5034 	me_id = (entry->ring_id & 0x0c) >> 2;
5035 	pipe_id = (entry->ring_id & 0x03) >> 0;
5036 	queue_id = (entry->ring_id & 0x70) >> 4;
5037 
5038 	switch (me_id) {
5039 	case 0:
5040 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5041 		break;
5042 	case 1:
5043 	case 2:
5044 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5045 			ring = &adev->gfx.compute_ring[i];
5046 			/* Per-queue interrupt is supported for MEC starting from VI.
5047 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
5048 			  */
5049 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5050 				amdgpu_fence_process(ring);
5051 		}
5052 		break;
5053 	}
5054 	return 0;
5055 }
5056 
5057 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
5058 				 struct amdgpu_irq_src *source,
5059 				 struct amdgpu_iv_entry *entry)
5060 {
5061 	DRM_ERROR("Illegal register access in command stream\n");
5062 	schedule_work(&adev->reset_work);
5063 	return 0;
5064 }
5065 
5066 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
5067 				  struct amdgpu_irq_src *source,
5068 				  struct amdgpu_iv_entry *entry)
5069 {
5070 	DRM_ERROR("Illegal instruction in command stream\n");
5071 	schedule_work(&adev->reset_work);
5072 	return 0;
5073 }
5074 
5075 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
5076 	.early_init = gfx_v8_0_early_init,
5077 	.late_init = gfx_v8_0_late_init,
5078 	.sw_init = gfx_v8_0_sw_init,
5079 	.sw_fini = gfx_v8_0_sw_fini,
5080 	.hw_init = gfx_v8_0_hw_init,
5081 	.hw_fini = gfx_v8_0_hw_fini,
5082 	.suspend = gfx_v8_0_suspend,
5083 	.resume = gfx_v8_0_resume,
5084 	.is_idle = gfx_v8_0_is_idle,
5085 	.wait_for_idle = gfx_v8_0_wait_for_idle,
5086 	.soft_reset = gfx_v8_0_soft_reset,
5087 	.print_status = gfx_v8_0_print_status,
5088 	.set_clockgating_state = gfx_v8_0_set_clockgating_state,
5089 	.set_powergating_state = gfx_v8_0_set_powergating_state,
5090 };
5091 
5092 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
5093 	.get_rptr = gfx_v8_0_ring_get_rptr_gfx,
5094 	.get_wptr = gfx_v8_0_ring_get_wptr_gfx,
5095 	.set_wptr = gfx_v8_0_ring_set_wptr_gfx,
5096 	.parse_cs = NULL,
5097 	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
5098 	.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
5099 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
5100 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
5101 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
5102 	.test_ring = gfx_v8_0_ring_test_ring,
5103 	.test_ib = gfx_v8_0_ring_test_ib,
5104 	.insert_nop = amdgpu_ring_insert_nop,
5105 	.pad_ib = amdgpu_ring_generic_pad_ib,
5106 };
5107 
5108 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
5109 	.get_rptr = gfx_v8_0_ring_get_rptr_compute,
5110 	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
5111 	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
5112 	.parse_cs = NULL,
5113 	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
5114 	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
5115 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
5116 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
5117 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
5118 	.test_ring = gfx_v8_0_ring_test_ring,
5119 	.test_ib = gfx_v8_0_ring_test_ib,
5120 	.insert_nop = amdgpu_ring_insert_nop,
5121 	.pad_ib = amdgpu_ring_generic_pad_ib,
5122 };
5123 
5124 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
5125 {
5126 	int i;
5127 
5128 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5129 		adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
5130 
5131 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
5132 		adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
5133 }
5134 
5135 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
5136 	.set = gfx_v8_0_set_eop_interrupt_state,
5137 	.process = gfx_v8_0_eop_irq,
5138 };
5139 
5140 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
5141 	.set = gfx_v8_0_set_priv_reg_fault_state,
5142 	.process = gfx_v8_0_priv_reg_irq,
5143 };
5144 
5145 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
5146 	.set = gfx_v8_0_set_priv_inst_fault_state,
5147 	.process = gfx_v8_0_priv_inst_irq,
5148 };
5149 
5150 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
5151 {
5152 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5153 	adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
5154 
5155 	adev->gfx.priv_reg_irq.num_types = 1;
5156 	adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
5157 
5158 	adev->gfx.priv_inst_irq.num_types = 1;
5159 	adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
5160 }
5161 
5162 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
5163 {
5164 	/* init asci gds info */
5165 	adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
5166 	adev->gds.gws.total_size = 64;
5167 	adev->gds.oa.total_size = 16;
5168 
5169 	if (adev->gds.mem.total_size == 64 * 1024) {
5170 		adev->gds.mem.gfx_partition_size = 4096;
5171 		adev->gds.mem.cs_partition_size = 4096;
5172 
5173 		adev->gds.gws.gfx_partition_size = 4;
5174 		adev->gds.gws.cs_partition_size = 4;
5175 
5176 		adev->gds.oa.gfx_partition_size = 4;
5177 		adev->gds.oa.cs_partition_size = 1;
5178 	} else {
5179 		adev->gds.mem.gfx_partition_size = 1024;
5180 		adev->gds.mem.cs_partition_size = 1024;
5181 
5182 		adev->gds.gws.gfx_partition_size = 16;
5183 		adev->gds.gws.cs_partition_size = 16;
5184 
5185 		adev->gds.oa.gfx_partition_size = 4;
5186 		adev->gds.oa.cs_partition_size = 4;
5187 	}
5188 }
5189 
5190 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev,
5191 		u32 se, u32 sh)
5192 {
5193 	u32 mask = 0, tmp, tmp1;
5194 	int i;
5195 
5196 	gfx_v8_0_select_se_sh(adev, se, sh);
5197 	tmp = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
5198 	tmp1 = RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
5199 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
5200 
5201 	tmp &= 0xffff0000;
5202 
5203 	tmp |= tmp1;
5204 	tmp >>= 16;
5205 
5206 	for (i = 0; i < adev->gfx.config.max_cu_per_sh; i ++) {
5207 		mask <<= 1;
5208 		mask |= 1;
5209 	}
5210 
5211 	return (~tmp) & mask;
5212 }
5213 
5214 int gfx_v8_0_get_cu_info(struct amdgpu_device *adev,
5215 						 struct amdgpu_cu_info *cu_info)
5216 {
5217 	int i, j, k, counter, active_cu_number = 0;
5218 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5219 
5220 	if (!adev || !cu_info)
5221 		return -EINVAL;
5222 
5223 	mutex_lock(&adev->grbm_idx_mutex);
5224 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5225 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5226 			mask = 1;
5227 			ao_bitmap = 0;
5228 			counter = 0;
5229 			bitmap = gfx_v8_0_get_cu_active_bitmap(adev, i, j);
5230 			cu_info->bitmap[i][j] = bitmap;
5231 
5232 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
5233 				if (bitmap & mask) {
5234 					if (counter < 2)
5235 						ao_bitmap |= mask;
5236 					counter ++;
5237 				}
5238 				mask <<= 1;
5239 			}
5240 			active_cu_number += counter;
5241 			ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5242 		}
5243 	}
5244 
5245 	cu_info->number = active_cu_number;
5246 	cu_info->ao_cu_mask = ao_cu_mask;
5247 	mutex_unlock(&adev->grbm_idx_mutex);
5248 	return 0;
5249 }
5250