xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c (revision bc5aa3a0)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vid.h"
29 #include "amdgpu_ucode.h"
30 #include "amdgpu_atombios.h"
31 #include "atombios_i2c.h"
32 #include "clearstate_vi.h"
33 
34 #include "gmc/gmc_8_2_d.h"
35 #include "gmc/gmc_8_2_sh_mask.h"
36 
37 #include "oss/oss_3_0_d.h"
38 #include "oss/oss_3_0_sh_mask.h"
39 
40 #include "bif/bif_5_0_d.h"
41 #include "bif/bif_5_0_sh_mask.h"
42 
43 #include "gca/gfx_8_0_d.h"
44 #include "gca/gfx_8_0_enum.h"
45 #include "gca/gfx_8_0_sh_mask.h"
46 #include "gca/gfx_8_0_enum.h"
47 
48 #include "dce/dce_10_0_d.h"
49 #include "dce/dce_10_0_sh_mask.h"
50 
51 #include "smu/smu_7_1_3_d.h"
52 
53 #define GFX8_NUM_GFX_RINGS     1
54 #define GFX8_NUM_COMPUTE_RINGS 8
55 
56 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
59 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
60 
61 #define ARRAY_MODE(x)					((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
62 #define PIPE_CONFIG(x)					((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
63 #define TILE_SPLIT(x)					((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
64 #define MICRO_TILE_MODE_NEW(x)				((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
65 #define SAMPLE_SPLIT(x)					((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
66 #define BANK_WIDTH(x)					((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
67 #define BANK_HEIGHT(x)					((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
68 #define MACRO_TILE_ASPECT(x)				((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
69 #define NUM_BANKS(x)					((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
70 
71 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
72 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
73 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
74 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
76 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
77 
78 /* BPM SERDES CMD */
79 #define SET_BPM_SERDES_CMD    1
80 #define CLE_BPM_SERDES_CMD    0
81 
82 /* BPM Register Address*/
83 enum {
84 	BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
85 	BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
86 	BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
87 	BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
88 	BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
89 	BPM_REG_FGCG_MAX
90 };
91 
92 #define RLC_FormatDirectRegListLength        14
93 
94 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
95 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
100 
101 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
106 
107 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
113 
114 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
119 
120 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
126 
127 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
128 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
133 
134 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
140 
141 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
142 {
143 	{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
144 	{mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
145 	{mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
146 	{mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
147 	{mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
148 	{mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
149 	{mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
150 	{mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
151 	{mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
152 	{mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
153 	{mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
154 	{mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
155 	{mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
156 	{mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
157 	{mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
158 	{mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
159 };
160 
161 static const u32 golden_settings_tonga_a11[] =
162 {
163 	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
164 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
165 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
166 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
167 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
168 	mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
169 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
170 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
171 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
172 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
173 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
174 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
175 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
176 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
177 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
178 };
179 
180 static const u32 tonga_golden_common_all[] =
181 {
182 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
183 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
184 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
185 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
186 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
187 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
188 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
189 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
190 };
191 
192 static const u32 tonga_mgcg_cgcg_init[] =
193 {
194 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
195 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
196 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
197 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
198 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
199 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
200 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
201 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
202 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
203 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
204 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
205 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
206 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
207 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
208 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
209 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
210 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
211 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
212 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
213 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
214 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
215 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
216 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
217 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
218 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
219 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
220 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
221 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
222 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
223 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
224 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
225 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
226 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
227 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
228 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
229 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
230 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
231 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
232 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
233 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
234 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
235 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
236 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
237 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
238 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
239 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
240 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
241 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
242 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
243 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
244 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
245 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
246 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
247 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
248 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
249 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
250 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
251 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
252 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
253 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
254 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
255 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
256 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
257 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
258 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
259 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
260 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
261 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
262 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
263 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
264 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
265 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
266 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
267 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
268 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
269 };
270 
271 static const u32 golden_settings_polaris11_a11[] =
272 {
273 	mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
274 	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
275 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
276 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
277 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
278 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
279 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
280 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
281 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
282 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
283 	mmSQ_CONFIG, 0x07f80000, 0x01180000,
284 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
285 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
286 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
287 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
288 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
289 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
290 };
291 
292 static const u32 polaris11_golden_common_all[] =
293 {
294 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
295 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
296 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
297 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
298 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
299 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
300 };
301 
302 static const u32 golden_settings_polaris10_a11[] =
303 {
304 	mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
305 	mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
306 	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
307 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
308 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
309 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
310 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
311 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
312 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
313 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
314 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
315 	mmSQ_CONFIG, 0x07f80000, 0x07180000,
316 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
317 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
318 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
319 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
320 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
321 };
322 
323 static const u32 polaris10_golden_common_all[] =
324 {
325 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
326 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
327 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
328 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
329 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
330 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
331 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
332 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
333 };
334 
335 static const u32 fiji_golden_common_all[] =
336 {
337 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
338 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
339 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
340 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
341 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
342 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
343 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
344 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
345 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
346 	mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
347 };
348 
349 static const u32 golden_settings_fiji_a10[] =
350 {
351 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
352 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
353 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
354 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
355 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
356 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
357 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
358 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
359 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
360 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
361 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
362 };
363 
364 static const u32 fiji_mgcg_cgcg_init[] =
365 {
366 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
367 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
368 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
369 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
370 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
371 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
372 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
373 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
374 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
375 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
376 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
377 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
378 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
379 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
380 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
381 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
382 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
383 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
384 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
385 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
386 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
387 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
388 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
389 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
390 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
391 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
392 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
393 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
394 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
395 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
396 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
397 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
398 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
399 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
400 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
401 };
402 
403 static const u32 golden_settings_iceland_a11[] =
404 {
405 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
406 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
407 	mmDB_DEBUG3, 0xc0000000, 0xc0000000,
408 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
409 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
410 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
411 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
412 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
413 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
414 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
415 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
416 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
417 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
418 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
419 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
420 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
421 };
422 
423 static const u32 iceland_golden_common_all[] =
424 {
425 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
426 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
427 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
428 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
429 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
430 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
431 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
432 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
433 };
434 
435 static const u32 iceland_mgcg_cgcg_init[] =
436 {
437 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
438 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
439 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
440 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
441 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
442 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
443 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
444 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
445 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
446 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
447 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
448 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
449 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
450 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
451 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
452 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
453 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
454 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
455 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
456 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
457 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
458 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
459 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
460 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
461 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
462 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
463 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
464 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
465 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
466 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
467 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
468 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
469 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
470 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
471 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
472 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
473 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
474 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
475 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
476 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
477 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
478 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
479 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
480 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
481 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
482 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
483 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
484 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
485 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
486 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
487 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
488 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
489 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
490 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
491 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
492 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
493 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
494 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
495 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
496 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
497 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
498 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
499 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
500 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
501 };
502 
503 static const u32 cz_golden_settings_a11[] =
504 {
505 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
506 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
507 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
508 	mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
509 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
510 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
511 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
512 	mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
513 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
514 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
515 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
516 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
517 };
518 
519 static const u32 cz_golden_common_all[] =
520 {
521 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
522 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
523 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
524 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
525 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
526 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
527 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
528 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
529 };
530 
531 static const u32 cz_mgcg_cgcg_init[] =
532 {
533 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
534 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
535 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
536 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
537 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
538 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
539 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
540 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
541 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
542 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
543 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
544 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
545 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
546 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
547 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
548 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
549 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
550 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
551 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
552 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
553 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
554 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
555 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
556 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
557 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
558 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
559 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
560 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
561 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
562 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
563 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
564 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
565 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
566 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
567 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
568 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
569 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
570 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
571 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
572 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
573 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
574 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
575 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
576 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
577 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
578 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
579 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
580 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
581 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
582 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
583 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
584 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
585 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
586 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
587 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
588 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
589 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
590 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
591 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
592 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
593 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
594 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
595 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
596 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
597 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
598 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
599 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
600 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
601 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
602 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
603 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
604 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
605 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
606 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
607 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
608 };
609 
610 static const u32 stoney_golden_settings_a11[] =
611 {
612 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
613 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
614 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
615 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
616 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
617 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
618 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
619 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
620 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
621 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
622 };
623 
624 static const u32 stoney_golden_common_all[] =
625 {
626 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
627 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
628 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
629 	mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
630 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
631 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
632 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
633 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
634 };
635 
636 static const u32 stoney_mgcg_cgcg_init[] =
637 {
638 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
639 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
640 	mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
641 	mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
642 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
643 	mmATC_MISC_CG, 0xffffffff, 0x000c0200,
644 };
645 
646 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
647 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
648 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
649 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
650 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
651 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
652 
653 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
654 {
655 	switch (adev->asic_type) {
656 	case CHIP_TOPAZ:
657 		amdgpu_program_register_sequence(adev,
658 						 iceland_mgcg_cgcg_init,
659 						 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
660 		amdgpu_program_register_sequence(adev,
661 						 golden_settings_iceland_a11,
662 						 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
663 		amdgpu_program_register_sequence(adev,
664 						 iceland_golden_common_all,
665 						 (const u32)ARRAY_SIZE(iceland_golden_common_all));
666 		break;
667 	case CHIP_FIJI:
668 		amdgpu_program_register_sequence(adev,
669 						 fiji_mgcg_cgcg_init,
670 						 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
671 		amdgpu_program_register_sequence(adev,
672 						 golden_settings_fiji_a10,
673 						 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
674 		amdgpu_program_register_sequence(adev,
675 						 fiji_golden_common_all,
676 						 (const u32)ARRAY_SIZE(fiji_golden_common_all));
677 		break;
678 
679 	case CHIP_TONGA:
680 		amdgpu_program_register_sequence(adev,
681 						 tonga_mgcg_cgcg_init,
682 						 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
683 		amdgpu_program_register_sequence(adev,
684 						 golden_settings_tonga_a11,
685 						 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
686 		amdgpu_program_register_sequence(adev,
687 						 tonga_golden_common_all,
688 						 (const u32)ARRAY_SIZE(tonga_golden_common_all));
689 		break;
690 	case CHIP_POLARIS11:
691 		amdgpu_program_register_sequence(adev,
692 						 golden_settings_polaris11_a11,
693 						 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
694 		amdgpu_program_register_sequence(adev,
695 						 polaris11_golden_common_all,
696 						 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
697 		break;
698 	case CHIP_POLARIS10:
699 		amdgpu_program_register_sequence(adev,
700 						 golden_settings_polaris10_a11,
701 						 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
702 		amdgpu_program_register_sequence(adev,
703 						 polaris10_golden_common_all,
704 						 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
705 		WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
706 		if (adev->pdev->revision == 0xc7) {
707 			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
708 			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
709 		}
710 		break;
711 	case CHIP_CARRIZO:
712 		amdgpu_program_register_sequence(adev,
713 						 cz_mgcg_cgcg_init,
714 						 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
715 		amdgpu_program_register_sequence(adev,
716 						 cz_golden_settings_a11,
717 						 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
718 		amdgpu_program_register_sequence(adev,
719 						 cz_golden_common_all,
720 						 (const u32)ARRAY_SIZE(cz_golden_common_all));
721 		break;
722 	case CHIP_STONEY:
723 		amdgpu_program_register_sequence(adev,
724 						 stoney_mgcg_cgcg_init,
725 						 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
726 		amdgpu_program_register_sequence(adev,
727 						 stoney_golden_settings_a11,
728 						 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
729 		amdgpu_program_register_sequence(adev,
730 						 stoney_golden_common_all,
731 						 (const u32)ARRAY_SIZE(stoney_golden_common_all));
732 		break;
733 	default:
734 		break;
735 	}
736 }
737 
738 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
739 {
740 	int i;
741 
742 	adev->gfx.scratch.num_reg = 7;
743 	adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
744 	for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
745 		adev->gfx.scratch.free[i] = true;
746 		adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
747 	}
748 }
749 
750 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
751 {
752 	struct amdgpu_device *adev = ring->adev;
753 	uint32_t scratch;
754 	uint32_t tmp = 0;
755 	unsigned i;
756 	int r;
757 
758 	r = amdgpu_gfx_scratch_get(adev, &scratch);
759 	if (r) {
760 		DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
761 		return r;
762 	}
763 	WREG32(scratch, 0xCAFEDEAD);
764 	r = amdgpu_ring_alloc(ring, 3);
765 	if (r) {
766 		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
767 			  ring->idx, r);
768 		amdgpu_gfx_scratch_free(adev, scratch);
769 		return r;
770 	}
771 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
772 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
773 	amdgpu_ring_write(ring, 0xDEADBEEF);
774 	amdgpu_ring_commit(ring);
775 
776 	for (i = 0; i < adev->usec_timeout; i++) {
777 		tmp = RREG32(scratch);
778 		if (tmp == 0xDEADBEEF)
779 			break;
780 		DRM_UDELAY(1);
781 	}
782 	if (i < adev->usec_timeout) {
783 		DRM_INFO("ring test on %d succeeded in %d usecs\n",
784 			 ring->idx, i);
785 	} else {
786 		DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
787 			  ring->idx, scratch, tmp);
788 		r = -EINVAL;
789 	}
790 	amdgpu_gfx_scratch_free(adev, scratch);
791 	return r;
792 }
793 
794 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
795 {
796 	struct amdgpu_device *adev = ring->adev;
797 	struct amdgpu_ib ib;
798 	struct fence *f = NULL;
799 	uint32_t scratch;
800 	uint32_t tmp = 0;
801 	long r;
802 
803 	r = amdgpu_gfx_scratch_get(adev, &scratch);
804 	if (r) {
805 		DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
806 		return r;
807 	}
808 	WREG32(scratch, 0xCAFEDEAD);
809 	memset(&ib, 0, sizeof(ib));
810 	r = amdgpu_ib_get(adev, NULL, 256, &ib);
811 	if (r) {
812 		DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
813 		goto err1;
814 	}
815 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
816 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
817 	ib.ptr[2] = 0xDEADBEEF;
818 	ib.length_dw = 3;
819 
820 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
821 	if (r)
822 		goto err2;
823 
824 	r = fence_wait_timeout(f, false, timeout);
825 	if (r == 0) {
826 		DRM_ERROR("amdgpu: IB test timed out.\n");
827 		r = -ETIMEDOUT;
828 		goto err2;
829 	} else if (r < 0) {
830 		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
831 		goto err2;
832 	}
833 	tmp = RREG32(scratch);
834 	if (tmp == 0xDEADBEEF) {
835 		DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
836 		r = 0;
837 	} else {
838 		DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
839 			  scratch, tmp);
840 		r = -EINVAL;
841 	}
842 err2:
843 	amdgpu_ib_free(adev, &ib, NULL);
844 	fence_put(f);
845 err1:
846 	amdgpu_gfx_scratch_free(adev, scratch);
847 	return r;
848 }
849 
850 
851 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
852 	release_firmware(adev->gfx.pfp_fw);
853 	adev->gfx.pfp_fw = NULL;
854 	release_firmware(adev->gfx.me_fw);
855 	adev->gfx.me_fw = NULL;
856 	release_firmware(adev->gfx.ce_fw);
857 	adev->gfx.ce_fw = NULL;
858 	release_firmware(adev->gfx.rlc_fw);
859 	adev->gfx.rlc_fw = NULL;
860 	release_firmware(adev->gfx.mec_fw);
861 	adev->gfx.mec_fw = NULL;
862 	if ((adev->asic_type != CHIP_STONEY) &&
863 	    (adev->asic_type != CHIP_TOPAZ))
864 		release_firmware(adev->gfx.mec2_fw);
865 	adev->gfx.mec2_fw = NULL;
866 
867 	kfree(adev->gfx.rlc.register_list_format);
868 }
869 
870 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
871 {
872 	const char *chip_name;
873 	char fw_name[30];
874 	int err;
875 	struct amdgpu_firmware_info *info = NULL;
876 	const struct common_firmware_header *header = NULL;
877 	const struct gfx_firmware_header_v1_0 *cp_hdr;
878 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
879 	unsigned int *tmp = NULL, i;
880 
881 	DRM_DEBUG("\n");
882 
883 	switch (adev->asic_type) {
884 	case CHIP_TOPAZ:
885 		chip_name = "topaz";
886 		break;
887 	case CHIP_TONGA:
888 		chip_name = "tonga";
889 		break;
890 	case CHIP_CARRIZO:
891 		chip_name = "carrizo";
892 		break;
893 	case CHIP_FIJI:
894 		chip_name = "fiji";
895 		break;
896 	case CHIP_POLARIS11:
897 		chip_name = "polaris11";
898 		break;
899 	case CHIP_POLARIS10:
900 		chip_name = "polaris10";
901 		break;
902 	case CHIP_STONEY:
903 		chip_name = "stoney";
904 		break;
905 	default:
906 		BUG();
907 	}
908 
909 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
910 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
911 	if (err)
912 		goto out;
913 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
914 	if (err)
915 		goto out;
916 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
917 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
918 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
919 
920 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
921 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
922 	if (err)
923 		goto out;
924 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
925 	if (err)
926 		goto out;
927 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
928 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
929 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
930 
931 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
932 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
933 	if (err)
934 		goto out;
935 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
936 	if (err)
937 		goto out;
938 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
939 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
940 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
941 
942 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
943 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
944 	if (err)
945 		goto out;
946 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
947 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
948 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
949 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
950 
951 	adev->gfx.rlc.save_and_restore_offset =
952 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
953 	adev->gfx.rlc.clear_state_descriptor_offset =
954 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
955 	adev->gfx.rlc.avail_scratch_ram_locations =
956 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
957 	adev->gfx.rlc.reg_restore_list_size =
958 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
959 	adev->gfx.rlc.reg_list_format_start =
960 			le32_to_cpu(rlc_hdr->reg_list_format_start);
961 	adev->gfx.rlc.reg_list_format_separate_start =
962 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
963 	adev->gfx.rlc.starting_offsets_start =
964 			le32_to_cpu(rlc_hdr->starting_offsets_start);
965 	adev->gfx.rlc.reg_list_format_size_bytes =
966 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
967 	adev->gfx.rlc.reg_list_size_bytes =
968 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
969 
970 	adev->gfx.rlc.register_list_format =
971 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
972 					adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
973 
974 	if (!adev->gfx.rlc.register_list_format) {
975 		err = -ENOMEM;
976 		goto out;
977 	}
978 
979 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
980 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
981 	for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
982 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
983 
984 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
985 
986 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
987 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
988 	for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
989 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
990 
991 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
992 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
993 	if (err)
994 		goto out;
995 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
996 	if (err)
997 		goto out;
998 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
999 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1000 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1001 
1002 	if ((adev->asic_type != CHIP_STONEY) &&
1003 	    (adev->asic_type != CHIP_TOPAZ)) {
1004 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1005 		err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1006 		if (!err) {
1007 			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1008 			if (err)
1009 				goto out;
1010 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1011 				adev->gfx.mec2_fw->data;
1012 			adev->gfx.mec2_fw_version =
1013 				le32_to_cpu(cp_hdr->header.ucode_version);
1014 			adev->gfx.mec2_feature_version =
1015 				le32_to_cpu(cp_hdr->ucode_feature_version);
1016 		} else {
1017 			err = 0;
1018 			adev->gfx.mec2_fw = NULL;
1019 		}
1020 	}
1021 
1022 	if (adev->firmware.smu_load) {
1023 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1024 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1025 		info->fw = adev->gfx.pfp_fw;
1026 		header = (const struct common_firmware_header *)info->fw->data;
1027 		adev->firmware.fw_size +=
1028 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1029 
1030 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1031 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1032 		info->fw = adev->gfx.me_fw;
1033 		header = (const struct common_firmware_header *)info->fw->data;
1034 		adev->firmware.fw_size +=
1035 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1036 
1037 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1038 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1039 		info->fw = adev->gfx.ce_fw;
1040 		header = (const struct common_firmware_header *)info->fw->data;
1041 		adev->firmware.fw_size +=
1042 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1043 
1044 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1045 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1046 		info->fw = adev->gfx.rlc_fw;
1047 		header = (const struct common_firmware_header *)info->fw->data;
1048 		adev->firmware.fw_size +=
1049 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1050 
1051 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1052 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1053 		info->fw = adev->gfx.mec_fw;
1054 		header = (const struct common_firmware_header *)info->fw->data;
1055 		adev->firmware.fw_size +=
1056 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1057 
1058 		if (adev->gfx.mec2_fw) {
1059 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1060 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1061 			info->fw = adev->gfx.mec2_fw;
1062 			header = (const struct common_firmware_header *)info->fw->data;
1063 			adev->firmware.fw_size +=
1064 				ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1065 		}
1066 
1067 	}
1068 
1069 out:
1070 	if (err) {
1071 		dev_err(adev->dev,
1072 			"gfx8: Failed to load firmware \"%s\"\n",
1073 			fw_name);
1074 		release_firmware(adev->gfx.pfp_fw);
1075 		adev->gfx.pfp_fw = NULL;
1076 		release_firmware(adev->gfx.me_fw);
1077 		adev->gfx.me_fw = NULL;
1078 		release_firmware(adev->gfx.ce_fw);
1079 		adev->gfx.ce_fw = NULL;
1080 		release_firmware(adev->gfx.rlc_fw);
1081 		adev->gfx.rlc_fw = NULL;
1082 		release_firmware(adev->gfx.mec_fw);
1083 		adev->gfx.mec_fw = NULL;
1084 		release_firmware(adev->gfx.mec2_fw);
1085 		adev->gfx.mec2_fw = NULL;
1086 	}
1087 	return err;
1088 }
1089 
1090 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1091 				    volatile u32 *buffer)
1092 {
1093 	u32 count = 0, i;
1094 	const struct cs_section_def *sect = NULL;
1095 	const struct cs_extent_def *ext = NULL;
1096 
1097 	if (adev->gfx.rlc.cs_data == NULL)
1098 		return;
1099 	if (buffer == NULL)
1100 		return;
1101 
1102 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1103 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1104 
1105 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1106 	buffer[count++] = cpu_to_le32(0x80000000);
1107 	buffer[count++] = cpu_to_le32(0x80000000);
1108 
1109 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1110 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1111 			if (sect->id == SECT_CONTEXT) {
1112 				buffer[count++] =
1113 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1114 				buffer[count++] = cpu_to_le32(ext->reg_index -
1115 						PACKET3_SET_CONTEXT_REG_START);
1116 				for (i = 0; i < ext->reg_count; i++)
1117 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1118 			} else {
1119 				return;
1120 			}
1121 		}
1122 	}
1123 
1124 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1125 	buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1126 			PACKET3_SET_CONTEXT_REG_START);
1127 	switch (adev->asic_type) {
1128 	case CHIP_TONGA:
1129 	case CHIP_POLARIS10:
1130 		buffer[count++] = cpu_to_le32(0x16000012);
1131 		buffer[count++] = cpu_to_le32(0x0000002A);
1132 		break;
1133 	case CHIP_POLARIS11:
1134 		buffer[count++] = cpu_to_le32(0x16000012);
1135 		buffer[count++] = cpu_to_le32(0x00000000);
1136 		break;
1137 	case CHIP_FIJI:
1138 		buffer[count++] = cpu_to_le32(0x3a00161a);
1139 		buffer[count++] = cpu_to_le32(0x0000002e);
1140 		break;
1141 	case CHIP_TOPAZ:
1142 	case CHIP_CARRIZO:
1143 		buffer[count++] = cpu_to_le32(0x00000002);
1144 		buffer[count++] = cpu_to_le32(0x00000000);
1145 		break;
1146 	case CHIP_STONEY:
1147 		buffer[count++] = cpu_to_le32(0x00000000);
1148 		buffer[count++] = cpu_to_le32(0x00000000);
1149 		break;
1150 	default:
1151 		buffer[count++] = cpu_to_le32(0x00000000);
1152 		buffer[count++] = cpu_to_le32(0x00000000);
1153 		break;
1154 	}
1155 
1156 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1157 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1158 
1159 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1160 	buffer[count++] = cpu_to_le32(0);
1161 }
1162 
1163 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1164 {
1165 	const __le32 *fw_data;
1166 	volatile u32 *dst_ptr;
1167 	int me, i, max_me = 4;
1168 	u32 bo_offset = 0;
1169 	u32 table_offset, table_size;
1170 
1171 	if (adev->asic_type == CHIP_CARRIZO)
1172 		max_me = 5;
1173 
1174 	/* write the cp table buffer */
1175 	dst_ptr = adev->gfx.rlc.cp_table_ptr;
1176 	for (me = 0; me < max_me; me++) {
1177 		if (me == 0) {
1178 			const struct gfx_firmware_header_v1_0 *hdr =
1179 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1180 			fw_data = (const __le32 *)
1181 				(adev->gfx.ce_fw->data +
1182 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1183 			table_offset = le32_to_cpu(hdr->jt_offset);
1184 			table_size = le32_to_cpu(hdr->jt_size);
1185 		} else if (me == 1) {
1186 			const struct gfx_firmware_header_v1_0 *hdr =
1187 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1188 			fw_data = (const __le32 *)
1189 				(adev->gfx.pfp_fw->data +
1190 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1191 			table_offset = le32_to_cpu(hdr->jt_offset);
1192 			table_size = le32_to_cpu(hdr->jt_size);
1193 		} else if (me == 2) {
1194 			const struct gfx_firmware_header_v1_0 *hdr =
1195 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1196 			fw_data = (const __le32 *)
1197 				(adev->gfx.me_fw->data +
1198 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1199 			table_offset = le32_to_cpu(hdr->jt_offset);
1200 			table_size = le32_to_cpu(hdr->jt_size);
1201 		} else if (me == 3) {
1202 			const struct gfx_firmware_header_v1_0 *hdr =
1203 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1204 			fw_data = (const __le32 *)
1205 				(adev->gfx.mec_fw->data +
1206 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1207 			table_offset = le32_to_cpu(hdr->jt_offset);
1208 			table_size = le32_to_cpu(hdr->jt_size);
1209 		} else  if (me == 4) {
1210 			const struct gfx_firmware_header_v1_0 *hdr =
1211 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1212 			fw_data = (const __le32 *)
1213 				(adev->gfx.mec2_fw->data +
1214 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1215 			table_offset = le32_to_cpu(hdr->jt_offset);
1216 			table_size = le32_to_cpu(hdr->jt_size);
1217 		}
1218 
1219 		for (i = 0; i < table_size; i ++) {
1220 			dst_ptr[bo_offset + i] =
1221 				cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1222 		}
1223 
1224 		bo_offset += table_size;
1225 	}
1226 }
1227 
1228 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1229 {
1230 	int r;
1231 
1232 	/* clear state block */
1233 	if (adev->gfx.rlc.clear_state_obj) {
1234 		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1235 		if (unlikely(r != 0))
1236 			dev_warn(adev->dev, "(%d) reserve RLC c bo failed\n", r);
1237 		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1238 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1239 
1240 		amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1241 		adev->gfx.rlc.clear_state_obj = NULL;
1242 	}
1243 
1244 	/* jump table block */
1245 	if (adev->gfx.rlc.cp_table_obj) {
1246 		r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1247 		if (unlikely(r != 0))
1248 			dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1249 		amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1250 		amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1251 
1252 		amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1253 		adev->gfx.rlc.cp_table_obj = NULL;
1254 	}
1255 }
1256 
1257 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1258 {
1259 	volatile u32 *dst_ptr;
1260 	u32 dws;
1261 	const struct cs_section_def *cs_data;
1262 	int r;
1263 
1264 	adev->gfx.rlc.cs_data = vi_cs_data;
1265 
1266 	cs_data = adev->gfx.rlc.cs_data;
1267 
1268 	if (cs_data) {
1269 		/* clear state block */
1270 		adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1271 
1272 		if (adev->gfx.rlc.clear_state_obj == NULL) {
1273 			r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1274 					     AMDGPU_GEM_DOMAIN_VRAM,
1275 					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1276 					     NULL, NULL,
1277 					     &adev->gfx.rlc.clear_state_obj);
1278 			if (r) {
1279 				dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1280 				gfx_v8_0_rlc_fini(adev);
1281 				return r;
1282 			}
1283 		}
1284 		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1285 		if (unlikely(r != 0)) {
1286 			gfx_v8_0_rlc_fini(adev);
1287 			return r;
1288 		}
1289 		r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1290 				  &adev->gfx.rlc.clear_state_gpu_addr);
1291 		if (r) {
1292 			amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1293 			dev_warn(adev->dev, "(%d) pin RLC c bo failed\n", r);
1294 			gfx_v8_0_rlc_fini(adev);
1295 			return r;
1296 		}
1297 
1298 		r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1299 		if (r) {
1300 			dev_warn(adev->dev, "(%d) map RLC c bo failed\n", r);
1301 			gfx_v8_0_rlc_fini(adev);
1302 			return r;
1303 		}
1304 		/* set up the cs buffer */
1305 		dst_ptr = adev->gfx.rlc.cs_ptr;
1306 		gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1307 		amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1308 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1309 	}
1310 
1311 	if ((adev->asic_type == CHIP_CARRIZO) ||
1312 	    (adev->asic_type == CHIP_STONEY)) {
1313 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1314 		if (adev->gfx.rlc.cp_table_obj == NULL) {
1315 			r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1316 					     AMDGPU_GEM_DOMAIN_VRAM,
1317 					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1318 					     NULL, NULL,
1319 					     &adev->gfx.rlc.cp_table_obj);
1320 			if (r) {
1321 				dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1322 				return r;
1323 			}
1324 		}
1325 
1326 		r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1327 		if (unlikely(r != 0)) {
1328 			dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1329 			return r;
1330 		}
1331 		r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1332 				  &adev->gfx.rlc.cp_table_gpu_addr);
1333 		if (r) {
1334 			amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1335 			dev_warn(adev->dev, "(%d) pin RLC cp_table bo failed\n", r);
1336 			return r;
1337 		}
1338 		r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1339 		if (r) {
1340 			dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1341 			return r;
1342 		}
1343 
1344 		cz_init_cp_jump_table(adev);
1345 
1346 		amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1347 		amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1348 
1349 	}
1350 
1351 	return 0;
1352 }
1353 
1354 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1355 {
1356 	int r;
1357 
1358 	if (adev->gfx.mec.hpd_eop_obj) {
1359 		r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1360 		if (unlikely(r != 0))
1361 			dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1362 		amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1363 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1364 
1365 		amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1366 		adev->gfx.mec.hpd_eop_obj = NULL;
1367 	}
1368 }
1369 
1370 #define MEC_HPD_SIZE 2048
1371 
1372 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1373 {
1374 	int r;
1375 	u32 *hpd;
1376 
1377 	/*
1378 	 * we assign only 1 pipe because all other pipes will
1379 	 * be handled by KFD
1380 	 */
1381 	adev->gfx.mec.num_mec = 1;
1382 	adev->gfx.mec.num_pipe = 1;
1383 	adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1384 
1385 	if (adev->gfx.mec.hpd_eop_obj == NULL) {
1386 		r = amdgpu_bo_create(adev,
1387 				     adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
1388 				     PAGE_SIZE, true,
1389 				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1390 				     &adev->gfx.mec.hpd_eop_obj);
1391 		if (r) {
1392 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1393 			return r;
1394 		}
1395 	}
1396 
1397 	r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1398 	if (unlikely(r != 0)) {
1399 		gfx_v8_0_mec_fini(adev);
1400 		return r;
1401 	}
1402 	r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1403 			  &adev->gfx.mec.hpd_eop_gpu_addr);
1404 	if (r) {
1405 		dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1406 		gfx_v8_0_mec_fini(adev);
1407 		return r;
1408 	}
1409 	r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1410 	if (r) {
1411 		dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1412 		gfx_v8_0_mec_fini(adev);
1413 		return r;
1414 	}
1415 
1416 	memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
1417 
1418 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1419 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1420 
1421 	return 0;
1422 }
1423 
1424 static const u32 vgpr_init_compute_shader[] =
1425 {
1426 	0x7e000209, 0x7e020208,
1427 	0x7e040207, 0x7e060206,
1428 	0x7e080205, 0x7e0a0204,
1429 	0x7e0c0203, 0x7e0e0202,
1430 	0x7e100201, 0x7e120200,
1431 	0x7e140209, 0x7e160208,
1432 	0x7e180207, 0x7e1a0206,
1433 	0x7e1c0205, 0x7e1e0204,
1434 	0x7e200203, 0x7e220202,
1435 	0x7e240201, 0x7e260200,
1436 	0x7e280209, 0x7e2a0208,
1437 	0x7e2c0207, 0x7e2e0206,
1438 	0x7e300205, 0x7e320204,
1439 	0x7e340203, 0x7e360202,
1440 	0x7e380201, 0x7e3a0200,
1441 	0x7e3c0209, 0x7e3e0208,
1442 	0x7e400207, 0x7e420206,
1443 	0x7e440205, 0x7e460204,
1444 	0x7e480203, 0x7e4a0202,
1445 	0x7e4c0201, 0x7e4e0200,
1446 	0x7e500209, 0x7e520208,
1447 	0x7e540207, 0x7e560206,
1448 	0x7e580205, 0x7e5a0204,
1449 	0x7e5c0203, 0x7e5e0202,
1450 	0x7e600201, 0x7e620200,
1451 	0x7e640209, 0x7e660208,
1452 	0x7e680207, 0x7e6a0206,
1453 	0x7e6c0205, 0x7e6e0204,
1454 	0x7e700203, 0x7e720202,
1455 	0x7e740201, 0x7e760200,
1456 	0x7e780209, 0x7e7a0208,
1457 	0x7e7c0207, 0x7e7e0206,
1458 	0xbf8a0000, 0xbf810000,
1459 };
1460 
1461 static const u32 sgpr_init_compute_shader[] =
1462 {
1463 	0xbe8a0100, 0xbe8c0102,
1464 	0xbe8e0104, 0xbe900106,
1465 	0xbe920108, 0xbe940100,
1466 	0xbe960102, 0xbe980104,
1467 	0xbe9a0106, 0xbe9c0108,
1468 	0xbe9e0100, 0xbea00102,
1469 	0xbea20104, 0xbea40106,
1470 	0xbea60108, 0xbea80100,
1471 	0xbeaa0102, 0xbeac0104,
1472 	0xbeae0106, 0xbeb00108,
1473 	0xbeb20100, 0xbeb40102,
1474 	0xbeb60104, 0xbeb80106,
1475 	0xbeba0108, 0xbebc0100,
1476 	0xbebe0102, 0xbec00104,
1477 	0xbec20106, 0xbec40108,
1478 	0xbec60100, 0xbec80102,
1479 	0xbee60004, 0xbee70005,
1480 	0xbeea0006, 0xbeeb0007,
1481 	0xbee80008, 0xbee90009,
1482 	0xbefc0000, 0xbf8a0000,
1483 	0xbf810000, 0x00000000,
1484 };
1485 
1486 static const u32 vgpr_init_regs[] =
1487 {
1488 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1489 	mmCOMPUTE_RESOURCE_LIMITS, 0,
1490 	mmCOMPUTE_NUM_THREAD_X, 256*4,
1491 	mmCOMPUTE_NUM_THREAD_Y, 1,
1492 	mmCOMPUTE_NUM_THREAD_Z, 1,
1493 	mmCOMPUTE_PGM_RSRC2, 20,
1494 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1495 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1496 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1497 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1498 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1499 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1500 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1501 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1502 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1503 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1504 };
1505 
1506 static const u32 sgpr1_init_regs[] =
1507 {
1508 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1509 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1510 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1511 	mmCOMPUTE_NUM_THREAD_Y, 1,
1512 	mmCOMPUTE_NUM_THREAD_Z, 1,
1513 	mmCOMPUTE_PGM_RSRC2, 20,
1514 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1515 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1516 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1517 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1518 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1519 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1520 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1521 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1522 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1523 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1524 };
1525 
1526 static const u32 sgpr2_init_regs[] =
1527 {
1528 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1529 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1530 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1531 	mmCOMPUTE_NUM_THREAD_Y, 1,
1532 	mmCOMPUTE_NUM_THREAD_Z, 1,
1533 	mmCOMPUTE_PGM_RSRC2, 20,
1534 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1535 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1536 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1537 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1538 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1539 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1540 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1541 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1542 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1543 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1544 };
1545 
1546 static const u32 sec_ded_counter_registers[] =
1547 {
1548 	mmCPC_EDC_ATC_CNT,
1549 	mmCPC_EDC_SCRATCH_CNT,
1550 	mmCPC_EDC_UCODE_CNT,
1551 	mmCPF_EDC_ATC_CNT,
1552 	mmCPF_EDC_ROQ_CNT,
1553 	mmCPF_EDC_TAG_CNT,
1554 	mmCPG_EDC_ATC_CNT,
1555 	mmCPG_EDC_DMA_CNT,
1556 	mmCPG_EDC_TAG_CNT,
1557 	mmDC_EDC_CSINVOC_CNT,
1558 	mmDC_EDC_RESTORE_CNT,
1559 	mmDC_EDC_STATE_CNT,
1560 	mmGDS_EDC_CNT,
1561 	mmGDS_EDC_GRBM_CNT,
1562 	mmGDS_EDC_OA_DED,
1563 	mmSPI_EDC_CNT,
1564 	mmSQC_ATC_EDC_GATCL1_CNT,
1565 	mmSQC_EDC_CNT,
1566 	mmSQ_EDC_DED_CNT,
1567 	mmSQ_EDC_INFO,
1568 	mmSQ_EDC_SEC_CNT,
1569 	mmTCC_EDC_CNT,
1570 	mmTCP_ATC_EDC_GATCL1_CNT,
1571 	mmTCP_EDC_CNT,
1572 	mmTD_EDC_CNT
1573 };
1574 
1575 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1576 {
1577 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1578 	struct amdgpu_ib ib;
1579 	struct fence *f = NULL;
1580 	int r, i;
1581 	u32 tmp;
1582 	unsigned total_size, vgpr_offset, sgpr_offset;
1583 	u64 gpu_addr;
1584 
1585 	/* only supported on CZ */
1586 	if (adev->asic_type != CHIP_CARRIZO)
1587 		return 0;
1588 
1589 	/* bail if the compute ring is not ready */
1590 	if (!ring->ready)
1591 		return 0;
1592 
1593 	tmp = RREG32(mmGB_EDC_MODE);
1594 	WREG32(mmGB_EDC_MODE, 0);
1595 
1596 	total_size =
1597 		(((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1598 	total_size +=
1599 		(((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1600 	total_size +=
1601 		(((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1602 	total_size = ALIGN(total_size, 256);
1603 	vgpr_offset = total_size;
1604 	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1605 	sgpr_offset = total_size;
1606 	total_size += sizeof(sgpr_init_compute_shader);
1607 
1608 	/* allocate an indirect buffer to put the commands in */
1609 	memset(&ib, 0, sizeof(ib));
1610 	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1611 	if (r) {
1612 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1613 		return r;
1614 	}
1615 
1616 	/* load the compute shaders */
1617 	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1618 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1619 
1620 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1621 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1622 
1623 	/* init the ib length to 0 */
1624 	ib.length_dw = 0;
1625 
1626 	/* VGPR */
1627 	/* write the register state for the compute dispatch */
1628 	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1629 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1630 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1631 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1632 	}
1633 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1634 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1635 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1636 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1637 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1638 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1639 
1640 	/* write dispatch packet */
1641 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1642 	ib.ptr[ib.length_dw++] = 8; /* x */
1643 	ib.ptr[ib.length_dw++] = 1; /* y */
1644 	ib.ptr[ib.length_dw++] = 1; /* z */
1645 	ib.ptr[ib.length_dw++] =
1646 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1647 
1648 	/* write CS partial flush packet */
1649 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1650 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1651 
1652 	/* SGPR1 */
1653 	/* write the register state for the compute dispatch */
1654 	for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1655 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1656 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1657 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1658 	}
1659 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1660 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1661 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1662 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1663 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1664 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1665 
1666 	/* write dispatch packet */
1667 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1668 	ib.ptr[ib.length_dw++] = 8; /* x */
1669 	ib.ptr[ib.length_dw++] = 1; /* y */
1670 	ib.ptr[ib.length_dw++] = 1; /* z */
1671 	ib.ptr[ib.length_dw++] =
1672 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1673 
1674 	/* write CS partial flush packet */
1675 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1676 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1677 
1678 	/* SGPR2 */
1679 	/* write the register state for the compute dispatch */
1680 	for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1681 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1682 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1683 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1684 	}
1685 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1686 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1687 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1688 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1689 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1690 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1691 
1692 	/* write dispatch packet */
1693 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1694 	ib.ptr[ib.length_dw++] = 8; /* x */
1695 	ib.ptr[ib.length_dw++] = 1; /* y */
1696 	ib.ptr[ib.length_dw++] = 1; /* z */
1697 	ib.ptr[ib.length_dw++] =
1698 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1699 
1700 	/* write CS partial flush packet */
1701 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1702 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1703 
1704 	/* shedule the ib on the ring */
1705 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
1706 	if (r) {
1707 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1708 		goto fail;
1709 	}
1710 
1711 	/* wait for the GPU to finish processing the IB */
1712 	r = fence_wait(f, false);
1713 	if (r) {
1714 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1715 		goto fail;
1716 	}
1717 
1718 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1719 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1720 	WREG32(mmGB_EDC_MODE, tmp);
1721 
1722 	tmp = RREG32(mmCC_GC_EDC_CONFIG);
1723 	tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1724 	WREG32(mmCC_GC_EDC_CONFIG, tmp);
1725 
1726 
1727 	/* read back registers to clear the counters */
1728 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1729 		RREG32(sec_ded_counter_registers[i]);
1730 
1731 fail:
1732 	amdgpu_ib_free(adev, &ib, NULL);
1733 	fence_put(f);
1734 
1735 	return r;
1736 }
1737 
1738 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1739 {
1740 	u32 gb_addr_config;
1741 	u32 mc_shared_chmap, mc_arb_ramcfg;
1742 	u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1743 	u32 tmp;
1744 	int ret;
1745 
1746 	switch (adev->asic_type) {
1747 	case CHIP_TOPAZ:
1748 		adev->gfx.config.max_shader_engines = 1;
1749 		adev->gfx.config.max_tile_pipes = 2;
1750 		adev->gfx.config.max_cu_per_sh = 6;
1751 		adev->gfx.config.max_sh_per_se = 1;
1752 		adev->gfx.config.max_backends_per_se = 2;
1753 		adev->gfx.config.max_texture_channel_caches = 2;
1754 		adev->gfx.config.max_gprs = 256;
1755 		adev->gfx.config.max_gs_threads = 32;
1756 		adev->gfx.config.max_hw_contexts = 8;
1757 
1758 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1759 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1760 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1761 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1762 		gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1763 		break;
1764 	case CHIP_FIJI:
1765 		adev->gfx.config.max_shader_engines = 4;
1766 		adev->gfx.config.max_tile_pipes = 16;
1767 		adev->gfx.config.max_cu_per_sh = 16;
1768 		adev->gfx.config.max_sh_per_se = 1;
1769 		adev->gfx.config.max_backends_per_se = 4;
1770 		adev->gfx.config.max_texture_channel_caches = 16;
1771 		adev->gfx.config.max_gprs = 256;
1772 		adev->gfx.config.max_gs_threads = 32;
1773 		adev->gfx.config.max_hw_contexts = 8;
1774 
1775 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1776 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1777 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1778 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1779 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1780 		break;
1781 	case CHIP_POLARIS11:
1782 		ret = amdgpu_atombios_get_gfx_info(adev);
1783 		if (ret)
1784 			return ret;
1785 		adev->gfx.config.max_gprs = 256;
1786 		adev->gfx.config.max_gs_threads = 32;
1787 		adev->gfx.config.max_hw_contexts = 8;
1788 
1789 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1790 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1791 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1792 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1793 		gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1794 		break;
1795 	case CHIP_POLARIS10:
1796 		ret = amdgpu_atombios_get_gfx_info(adev);
1797 		if (ret)
1798 			return ret;
1799 		adev->gfx.config.max_gprs = 256;
1800 		adev->gfx.config.max_gs_threads = 32;
1801 		adev->gfx.config.max_hw_contexts = 8;
1802 
1803 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1804 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1805 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1806 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1807 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1808 		break;
1809 	case CHIP_TONGA:
1810 		adev->gfx.config.max_shader_engines = 4;
1811 		adev->gfx.config.max_tile_pipes = 8;
1812 		adev->gfx.config.max_cu_per_sh = 8;
1813 		adev->gfx.config.max_sh_per_se = 1;
1814 		adev->gfx.config.max_backends_per_se = 2;
1815 		adev->gfx.config.max_texture_channel_caches = 8;
1816 		adev->gfx.config.max_gprs = 256;
1817 		adev->gfx.config.max_gs_threads = 32;
1818 		adev->gfx.config.max_hw_contexts = 8;
1819 
1820 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1821 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1822 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1823 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1824 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1825 		break;
1826 	case CHIP_CARRIZO:
1827 		adev->gfx.config.max_shader_engines = 1;
1828 		adev->gfx.config.max_tile_pipes = 2;
1829 		adev->gfx.config.max_sh_per_se = 1;
1830 		adev->gfx.config.max_backends_per_se = 2;
1831 
1832 		switch (adev->pdev->revision) {
1833 		case 0xc4:
1834 		case 0x84:
1835 		case 0xc8:
1836 		case 0xcc:
1837 		case 0xe1:
1838 		case 0xe3:
1839 			/* B10 */
1840 			adev->gfx.config.max_cu_per_sh = 8;
1841 			break;
1842 		case 0xc5:
1843 		case 0x81:
1844 		case 0x85:
1845 		case 0xc9:
1846 		case 0xcd:
1847 		case 0xe2:
1848 		case 0xe4:
1849 			/* B8 */
1850 			adev->gfx.config.max_cu_per_sh = 6;
1851 			break;
1852 		case 0xc6:
1853 		case 0xca:
1854 		case 0xce:
1855 		case 0x88:
1856 			/* B6 */
1857 			adev->gfx.config.max_cu_per_sh = 6;
1858 			break;
1859 		case 0xc7:
1860 		case 0x87:
1861 		case 0xcb:
1862 		case 0xe5:
1863 		case 0x89:
1864 		default:
1865 			/* B4 */
1866 			adev->gfx.config.max_cu_per_sh = 4;
1867 			break;
1868 		}
1869 
1870 		adev->gfx.config.max_texture_channel_caches = 2;
1871 		adev->gfx.config.max_gprs = 256;
1872 		adev->gfx.config.max_gs_threads = 32;
1873 		adev->gfx.config.max_hw_contexts = 8;
1874 
1875 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1876 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1877 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1878 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1879 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1880 		break;
1881 	case CHIP_STONEY:
1882 		adev->gfx.config.max_shader_engines = 1;
1883 		adev->gfx.config.max_tile_pipes = 2;
1884 		adev->gfx.config.max_sh_per_se = 1;
1885 		adev->gfx.config.max_backends_per_se = 1;
1886 
1887 		switch (adev->pdev->revision) {
1888 		case 0xc0:
1889 		case 0xc1:
1890 		case 0xc2:
1891 		case 0xc4:
1892 		case 0xc8:
1893 		case 0xc9:
1894 			adev->gfx.config.max_cu_per_sh = 3;
1895 			break;
1896 		case 0xd0:
1897 		case 0xd1:
1898 		case 0xd2:
1899 		default:
1900 			adev->gfx.config.max_cu_per_sh = 2;
1901 			break;
1902 		}
1903 
1904 		adev->gfx.config.max_texture_channel_caches = 2;
1905 		adev->gfx.config.max_gprs = 256;
1906 		adev->gfx.config.max_gs_threads = 16;
1907 		adev->gfx.config.max_hw_contexts = 8;
1908 
1909 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1910 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1911 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1912 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1913 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1914 		break;
1915 	default:
1916 		adev->gfx.config.max_shader_engines = 2;
1917 		adev->gfx.config.max_tile_pipes = 4;
1918 		adev->gfx.config.max_cu_per_sh = 2;
1919 		adev->gfx.config.max_sh_per_se = 1;
1920 		adev->gfx.config.max_backends_per_se = 2;
1921 		adev->gfx.config.max_texture_channel_caches = 4;
1922 		adev->gfx.config.max_gprs = 256;
1923 		adev->gfx.config.max_gs_threads = 32;
1924 		adev->gfx.config.max_hw_contexts = 8;
1925 
1926 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1927 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1928 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1929 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1930 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1931 		break;
1932 	}
1933 
1934 	mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1935 	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1936 	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1937 
1938 	adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1939 	adev->gfx.config.mem_max_burst_length_bytes = 256;
1940 	if (adev->flags & AMD_IS_APU) {
1941 		/* Get memory bank mapping mode. */
1942 		tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1943 		dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1944 		dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1945 
1946 		tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1947 		dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1948 		dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1949 
1950 		/* Validate settings in case only one DIMM installed. */
1951 		if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1952 			dimm00_addr_map = 0;
1953 		if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1954 			dimm01_addr_map = 0;
1955 		if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1956 			dimm10_addr_map = 0;
1957 		if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1958 			dimm11_addr_map = 0;
1959 
1960 		/* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1961 		/* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1962 		if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1963 			adev->gfx.config.mem_row_size_in_kb = 2;
1964 		else
1965 			adev->gfx.config.mem_row_size_in_kb = 1;
1966 	} else {
1967 		tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1968 		adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1969 		if (adev->gfx.config.mem_row_size_in_kb > 4)
1970 			adev->gfx.config.mem_row_size_in_kb = 4;
1971 	}
1972 
1973 	adev->gfx.config.shader_engine_tile_size = 32;
1974 	adev->gfx.config.num_gpus = 1;
1975 	adev->gfx.config.multi_gpu_tile_size = 64;
1976 
1977 	/* fix up row size */
1978 	switch (adev->gfx.config.mem_row_size_in_kb) {
1979 	case 1:
1980 	default:
1981 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1982 		break;
1983 	case 2:
1984 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1985 		break;
1986 	case 4:
1987 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1988 		break;
1989 	}
1990 	adev->gfx.config.gb_addr_config = gb_addr_config;
1991 
1992 	return 0;
1993 }
1994 
1995 static int gfx_v8_0_sw_init(void *handle)
1996 {
1997 	int i, r;
1998 	struct amdgpu_ring *ring;
1999 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2000 
2001 	/* EOP Event */
2002 	r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
2003 	if (r)
2004 		return r;
2005 
2006 	/* Privileged reg */
2007 	r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
2008 	if (r)
2009 		return r;
2010 
2011 	/* Privileged inst */
2012 	r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
2013 	if (r)
2014 		return r;
2015 
2016 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2017 
2018 	gfx_v8_0_scratch_init(adev);
2019 
2020 	r = gfx_v8_0_init_microcode(adev);
2021 	if (r) {
2022 		DRM_ERROR("Failed to load gfx firmware!\n");
2023 		return r;
2024 	}
2025 
2026 	r = gfx_v8_0_rlc_init(adev);
2027 	if (r) {
2028 		DRM_ERROR("Failed to init rlc BOs!\n");
2029 		return r;
2030 	}
2031 
2032 	r = gfx_v8_0_mec_init(adev);
2033 	if (r) {
2034 		DRM_ERROR("Failed to init MEC BOs!\n");
2035 		return r;
2036 	}
2037 
2038 	/* set up the gfx ring */
2039 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2040 		ring = &adev->gfx.gfx_ring[i];
2041 		ring->ring_obj = NULL;
2042 		sprintf(ring->name, "gfx");
2043 		/* no gfx doorbells on iceland */
2044 		if (adev->asic_type != CHIP_TOPAZ) {
2045 			ring->use_doorbell = true;
2046 			ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2047 		}
2048 
2049 		r = amdgpu_ring_init(adev, ring, 1024,
2050 				     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
2051 				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
2052 				     AMDGPU_RING_TYPE_GFX);
2053 		if (r)
2054 			return r;
2055 	}
2056 
2057 	/* set up the compute queues */
2058 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2059 		unsigned irq_type;
2060 
2061 		/* max 32 queues per MEC */
2062 		if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2063 			DRM_ERROR("Too many (%d) compute rings!\n", i);
2064 			break;
2065 		}
2066 		ring = &adev->gfx.compute_ring[i];
2067 		ring->ring_obj = NULL;
2068 		ring->use_doorbell = true;
2069 		ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2070 		ring->me = 1; /* first MEC */
2071 		ring->pipe = i / 8;
2072 		ring->queue = i % 8;
2073 		sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2074 		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2075 		/* type-2 packets are deprecated on MEC, use type-3 instead */
2076 		r = amdgpu_ring_init(adev, ring, 1024,
2077 				     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
2078 				     &adev->gfx.eop_irq, irq_type,
2079 				     AMDGPU_RING_TYPE_COMPUTE);
2080 		if (r)
2081 			return r;
2082 	}
2083 
2084 	/* reserve GDS, GWS and OA resource for gfx */
2085 	r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
2086 			PAGE_SIZE, true,
2087 			AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
2088 			NULL, &adev->gds.gds_gfx_bo);
2089 	if (r)
2090 		return r;
2091 
2092 	r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
2093 		PAGE_SIZE, true,
2094 		AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
2095 		NULL, &adev->gds.gws_gfx_bo);
2096 	if (r)
2097 		return r;
2098 
2099 	r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
2100 			PAGE_SIZE, true,
2101 			AMDGPU_GEM_DOMAIN_OA, 0, NULL,
2102 			NULL, &adev->gds.oa_gfx_bo);
2103 	if (r)
2104 		return r;
2105 
2106 	adev->gfx.ce_ram_size = 0x8000;
2107 
2108 	r = gfx_v8_0_gpu_early_init(adev);
2109 	if (r)
2110 		return r;
2111 
2112 	return 0;
2113 }
2114 
2115 static int gfx_v8_0_sw_fini(void *handle)
2116 {
2117 	int i;
2118 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2119 
2120 	amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
2121 	amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
2122 	amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
2123 
2124 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2125 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2126 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2127 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2128 
2129 	gfx_v8_0_mec_fini(adev);
2130 
2131 	gfx_v8_0_rlc_fini(adev);
2132 
2133 	gfx_v8_0_free_microcode(adev);
2134 
2135 	return 0;
2136 }
2137 
2138 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2139 {
2140 	uint32_t *modearray, *mod2array;
2141 	const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2142 	const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2143 	u32 reg_offset;
2144 
2145 	modearray = adev->gfx.config.tile_mode_array;
2146 	mod2array = adev->gfx.config.macrotile_mode_array;
2147 
2148 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2149 		modearray[reg_offset] = 0;
2150 
2151 	for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2152 		mod2array[reg_offset] = 0;
2153 
2154 	switch (adev->asic_type) {
2155 	case CHIP_TOPAZ:
2156 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2157 				PIPE_CONFIG(ADDR_SURF_P2) |
2158 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2159 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2160 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2161 				PIPE_CONFIG(ADDR_SURF_P2) |
2162 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2163 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2164 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2165 				PIPE_CONFIG(ADDR_SURF_P2) |
2166 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2167 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2168 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2169 				PIPE_CONFIG(ADDR_SURF_P2) |
2170 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2171 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2172 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2173 				PIPE_CONFIG(ADDR_SURF_P2) |
2174 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2175 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2176 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2177 				PIPE_CONFIG(ADDR_SURF_P2) |
2178 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2179 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2180 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2181 				PIPE_CONFIG(ADDR_SURF_P2) |
2182 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2183 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2184 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2185 				PIPE_CONFIG(ADDR_SURF_P2));
2186 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2187 				PIPE_CONFIG(ADDR_SURF_P2) |
2188 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2189 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2190 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2191 				 PIPE_CONFIG(ADDR_SURF_P2) |
2192 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2193 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2194 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2195 				 PIPE_CONFIG(ADDR_SURF_P2) |
2196 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2197 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2198 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2199 				 PIPE_CONFIG(ADDR_SURF_P2) |
2200 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2201 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2202 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2203 				 PIPE_CONFIG(ADDR_SURF_P2) |
2204 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2205 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2206 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2207 				 PIPE_CONFIG(ADDR_SURF_P2) |
2208 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2209 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2210 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2211 				 PIPE_CONFIG(ADDR_SURF_P2) |
2212 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2213 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2214 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2215 				 PIPE_CONFIG(ADDR_SURF_P2) |
2216 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2217 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2218 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2219 				 PIPE_CONFIG(ADDR_SURF_P2) |
2220 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2221 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2222 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2223 				 PIPE_CONFIG(ADDR_SURF_P2) |
2224 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2225 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2226 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2227 				 PIPE_CONFIG(ADDR_SURF_P2) |
2228 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2229 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2230 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2231 				 PIPE_CONFIG(ADDR_SURF_P2) |
2232 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2233 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2234 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2235 				 PIPE_CONFIG(ADDR_SURF_P2) |
2236 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2237 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2238 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2239 				 PIPE_CONFIG(ADDR_SURF_P2) |
2240 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2241 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2242 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2243 				 PIPE_CONFIG(ADDR_SURF_P2) |
2244 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2245 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2246 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2247 				 PIPE_CONFIG(ADDR_SURF_P2) |
2248 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2249 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2250 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2251 				 PIPE_CONFIG(ADDR_SURF_P2) |
2252 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2253 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2254 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2255 				 PIPE_CONFIG(ADDR_SURF_P2) |
2256 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2257 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2258 
2259 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2260 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2261 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2262 				NUM_BANKS(ADDR_SURF_8_BANK));
2263 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2264 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2265 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2266 				NUM_BANKS(ADDR_SURF_8_BANK));
2267 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2268 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2269 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2270 				NUM_BANKS(ADDR_SURF_8_BANK));
2271 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2272 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2273 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2274 				NUM_BANKS(ADDR_SURF_8_BANK));
2275 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2276 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2277 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2278 				NUM_BANKS(ADDR_SURF_8_BANK));
2279 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2280 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2281 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2282 				NUM_BANKS(ADDR_SURF_8_BANK));
2283 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2284 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2285 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2286 				NUM_BANKS(ADDR_SURF_8_BANK));
2287 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2288 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2289 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2290 				NUM_BANKS(ADDR_SURF_16_BANK));
2291 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2292 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2293 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2294 				NUM_BANKS(ADDR_SURF_16_BANK));
2295 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2296 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2297 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2298 				 NUM_BANKS(ADDR_SURF_16_BANK));
2299 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2300 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2301 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2302 				 NUM_BANKS(ADDR_SURF_16_BANK));
2303 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2304 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2305 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2306 				 NUM_BANKS(ADDR_SURF_16_BANK));
2307 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2308 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2309 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2310 				 NUM_BANKS(ADDR_SURF_16_BANK));
2311 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2312 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2313 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2314 				 NUM_BANKS(ADDR_SURF_8_BANK));
2315 
2316 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2317 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2318 			    reg_offset != 23)
2319 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2320 
2321 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2322 			if (reg_offset != 7)
2323 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2324 
2325 		break;
2326 	case CHIP_FIJI:
2327 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2328 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2329 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2330 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2331 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2332 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2333 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2334 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2335 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2336 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2337 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2338 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2339 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2340 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2341 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2342 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2343 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2344 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2345 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2346 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2347 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2348 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2349 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2350 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2351 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2352 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2353 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2354 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2355 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2356 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2357 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2358 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2359 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2360 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2361 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2362 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2363 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2364 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2365 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2366 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2367 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2368 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2369 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2370 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2371 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2372 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2373 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2374 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2375 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2376 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2377 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2378 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2379 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2380 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2381 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2382 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2383 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2384 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2385 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2386 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2387 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2388 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2389 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2390 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2391 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2392 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2393 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2394 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2395 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2396 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2397 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2398 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2399 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2400 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2401 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2402 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2403 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2404 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2405 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2406 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2407 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2408 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2409 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2410 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2411 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2412 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2413 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2414 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2415 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2416 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2417 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2418 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2419 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2420 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2421 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2422 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2423 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2424 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2425 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2426 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2427 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2428 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2429 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2430 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2431 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2432 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2433 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2434 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2435 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2436 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2437 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2438 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2439 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2440 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2441 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2442 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2443 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2444 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2445 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2446 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2447 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2448 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2449 
2450 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2451 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2452 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2453 				NUM_BANKS(ADDR_SURF_8_BANK));
2454 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2455 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2456 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2457 				NUM_BANKS(ADDR_SURF_8_BANK));
2458 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2459 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2460 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2461 				NUM_BANKS(ADDR_SURF_8_BANK));
2462 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2463 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2464 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2465 				NUM_BANKS(ADDR_SURF_8_BANK));
2466 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2467 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2468 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2469 				NUM_BANKS(ADDR_SURF_8_BANK));
2470 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2471 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2472 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2473 				NUM_BANKS(ADDR_SURF_8_BANK));
2474 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2475 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2476 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2477 				NUM_BANKS(ADDR_SURF_8_BANK));
2478 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2480 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2481 				NUM_BANKS(ADDR_SURF_8_BANK));
2482 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2483 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2484 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2485 				NUM_BANKS(ADDR_SURF_8_BANK));
2486 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2487 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2488 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2489 				 NUM_BANKS(ADDR_SURF_8_BANK));
2490 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2491 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2492 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2493 				 NUM_BANKS(ADDR_SURF_8_BANK));
2494 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2495 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2496 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2497 				 NUM_BANKS(ADDR_SURF_8_BANK));
2498 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2499 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2500 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2501 				 NUM_BANKS(ADDR_SURF_8_BANK));
2502 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2503 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2504 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2505 				 NUM_BANKS(ADDR_SURF_4_BANK));
2506 
2507 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2508 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2509 
2510 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2511 			if (reg_offset != 7)
2512 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2513 
2514 		break;
2515 	case CHIP_TONGA:
2516 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2517 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2518 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2519 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2520 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2521 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2522 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2523 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2524 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2525 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2526 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2527 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2528 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2529 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2530 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2531 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2532 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2533 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2534 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2535 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2536 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2537 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2538 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2539 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2540 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2541 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2542 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2543 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2544 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2545 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2546 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2547 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2548 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2549 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2550 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2551 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2552 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2553 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2554 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2555 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2556 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2557 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2558 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2559 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2560 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2561 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2562 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2563 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2564 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2565 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2566 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2567 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2568 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2569 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2570 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2571 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2572 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2573 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2574 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2575 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2576 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2577 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2578 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2579 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2580 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2581 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2582 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2583 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2584 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2585 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2586 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2587 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2588 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2589 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2590 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2591 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2592 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2593 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2594 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2595 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2596 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2597 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2598 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2599 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2600 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2601 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2602 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2603 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2604 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2605 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2606 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2607 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2608 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2609 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2610 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2611 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2612 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2613 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2614 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2615 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2616 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2617 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2618 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2619 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2620 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2621 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2622 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2623 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2624 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2625 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2626 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2627 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2628 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2629 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2630 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2631 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2632 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2633 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2634 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2635 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2636 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2637 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2638 
2639 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2641 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2642 				NUM_BANKS(ADDR_SURF_16_BANK));
2643 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2644 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2645 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2646 				NUM_BANKS(ADDR_SURF_16_BANK));
2647 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2648 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2649 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2650 				NUM_BANKS(ADDR_SURF_16_BANK));
2651 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2652 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2653 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2654 				NUM_BANKS(ADDR_SURF_16_BANK));
2655 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2656 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2657 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2658 				NUM_BANKS(ADDR_SURF_16_BANK));
2659 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2660 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2661 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2662 				NUM_BANKS(ADDR_SURF_16_BANK));
2663 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2664 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2665 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2666 				NUM_BANKS(ADDR_SURF_16_BANK));
2667 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2668 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2669 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2670 				NUM_BANKS(ADDR_SURF_16_BANK));
2671 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2672 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2673 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2674 				NUM_BANKS(ADDR_SURF_16_BANK));
2675 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2676 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2677 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2678 				 NUM_BANKS(ADDR_SURF_16_BANK));
2679 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2680 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2681 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2682 				 NUM_BANKS(ADDR_SURF_16_BANK));
2683 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2684 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2685 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2686 				 NUM_BANKS(ADDR_SURF_8_BANK));
2687 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2688 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2689 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2690 				 NUM_BANKS(ADDR_SURF_4_BANK));
2691 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2692 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2693 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2694 				 NUM_BANKS(ADDR_SURF_4_BANK));
2695 
2696 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2697 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2698 
2699 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2700 			if (reg_offset != 7)
2701 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2702 
2703 		break;
2704 	case CHIP_POLARIS11:
2705 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2706 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2708 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2709 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2710 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2712 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2713 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2714 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2715 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2716 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2717 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2718 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2719 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2720 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2721 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2722 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2723 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2724 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2725 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2726 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2727 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2728 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2729 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2730 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2731 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2732 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2733 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2734 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2735 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2736 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2737 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2738 				PIPE_CONFIG(ADDR_SURF_P4_16x16));
2739 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2740 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2741 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2742 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2743 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2744 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2745 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2746 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2747 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2748 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2749 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2750 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2751 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2752 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2753 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2754 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2755 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2756 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2757 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2758 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2759 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2760 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2761 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2762 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2763 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2764 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2765 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2766 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2767 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2768 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2769 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2770 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2771 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2772 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2773 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2774 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2775 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2776 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2777 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2778 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2779 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2780 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2781 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2782 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2783 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2784 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2785 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2786 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2787 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2788 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2789 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2790 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2791 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2792 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2793 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2794 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2795 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2796 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2797 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2798 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2799 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2800 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2801 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2802 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2803 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2804 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2805 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2806 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2807 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2808 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2809 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2810 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2811 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2812 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2813 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2814 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2815 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2816 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2817 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2818 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2819 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2820 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2821 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2822 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2823 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2824 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2825 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2826 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2827 
2828 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2829 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2830 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2831 				NUM_BANKS(ADDR_SURF_16_BANK));
2832 
2833 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2834 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2835 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2836 				NUM_BANKS(ADDR_SURF_16_BANK));
2837 
2838 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2839 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2840 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2841 				NUM_BANKS(ADDR_SURF_16_BANK));
2842 
2843 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2844 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2845 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2846 				NUM_BANKS(ADDR_SURF_16_BANK));
2847 
2848 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2849 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2850 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2851 				NUM_BANKS(ADDR_SURF_16_BANK));
2852 
2853 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2854 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2855 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2856 				NUM_BANKS(ADDR_SURF_16_BANK));
2857 
2858 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2859 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2860 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2861 				NUM_BANKS(ADDR_SURF_16_BANK));
2862 
2863 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2864 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2865 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2866 				NUM_BANKS(ADDR_SURF_16_BANK));
2867 
2868 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2869 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2870 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2871 				NUM_BANKS(ADDR_SURF_16_BANK));
2872 
2873 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2874 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2875 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2876 				NUM_BANKS(ADDR_SURF_16_BANK));
2877 
2878 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2879 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2880 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2881 				NUM_BANKS(ADDR_SURF_16_BANK));
2882 
2883 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2884 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2885 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2886 				NUM_BANKS(ADDR_SURF_16_BANK));
2887 
2888 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2889 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2890 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2891 				NUM_BANKS(ADDR_SURF_8_BANK));
2892 
2893 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2894 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2895 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2896 				NUM_BANKS(ADDR_SURF_4_BANK));
2897 
2898 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2899 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2900 
2901 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2902 			if (reg_offset != 7)
2903 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2904 
2905 		break;
2906 	case CHIP_POLARIS10:
2907 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2908 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2909 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2910 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2911 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2912 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2913 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2914 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2915 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2916 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2917 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2918 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2919 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2920 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2921 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2922 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2923 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2924 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2925 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2926 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2927 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2928 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2929 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2930 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2931 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2932 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2933 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2934 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2935 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2936 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2937 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2938 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2939 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2940 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2941 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2942 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2943 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2944 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2945 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2946 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2947 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2948 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2949 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2950 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2951 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2952 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2953 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2954 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2955 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2956 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2957 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2958 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2959 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2960 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2961 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2962 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2963 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2964 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2965 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2966 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2967 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2968 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2969 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2970 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2971 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2972 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2973 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2974 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2975 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2976 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2977 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2978 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2979 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2980 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2981 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2982 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2983 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2984 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2985 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2986 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2987 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2988 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2989 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2990 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2991 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2992 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2993 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2994 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2995 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2996 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2997 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2998 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2999 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3000 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3001 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3002 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3003 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3004 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3005 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3006 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3007 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3008 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3009 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3010 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3011 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3012 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3013 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3014 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3015 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3016 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3017 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3018 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3019 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3020 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3021 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3022 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3023 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3024 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3025 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3026 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3027 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3028 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3029 
3030 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3031 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3032 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3033 				NUM_BANKS(ADDR_SURF_16_BANK));
3034 
3035 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3036 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3037 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3038 				NUM_BANKS(ADDR_SURF_16_BANK));
3039 
3040 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3041 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3042 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3043 				NUM_BANKS(ADDR_SURF_16_BANK));
3044 
3045 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3046 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3047 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3048 				NUM_BANKS(ADDR_SURF_16_BANK));
3049 
3050 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3051 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3052 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3053 				NUM_BANKS(ADDR_SURF_16_BANK));
3054 
3055 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3056 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3057 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3058 				NUM_BANKS(ADDR_SURF_16_BANK));
3059 
3060 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3061 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3062 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3063 				NUM_BANKS(ADDR_SURF_16_BANK));
3064 
3065 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3066 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3067 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3068 				NUM_BANKS(ADDR_SURF_16_BANK));
3069 
3070 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3071 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3072 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3073 				NUM_BANKS(ADDR_SURF_16_BANK));
3074 
3075 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3076 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3077 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3078 				NUM_BANKS(ADDR_SURF_16_BANK));
3079 
3080 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3081 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3082 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3083 				NUM_BANKS(ADDR_SURF_16_BANK));
3084 
3085 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3086 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3087 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3088 				NUM_BANKS(ADDR_SURF_8_BANK));
3089 
3090 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3091 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3092 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3093 				NUM_BANKS(ADDR_SURF_4_BANK));
3094 
3095 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3096 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3097 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3098 				NUM_BANKS(ADDR_SURF_4_BANK));
3099 
3100 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3101 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3102 
3103 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3104 			if (reg_offset != 7)
3105 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3106 
3107 		break;
3108 	case CHIP_STONEY:
3109 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3110 				PIPE_CONFIG(ADDR_SURF_P2) |
3111 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3112 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3113 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3114 				PIPE_CONFIG(ADDR_SURF_P2) |
3115 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3116 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3117 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3118 				PIPE_CONFIG(ADDR_SURF_P2) |
3119 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3120 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3121 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3122 				PIPE_CONFIG(ADDR_SURF_P2) |
3123 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3124 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3125 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3126 				PIPE_CONFIG(ADDR_SURF_P2) |
3127 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3128 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3129 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3130 				PIPE_CONFIG(ADDR_SURF_P2) |
3131 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3132 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3133 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3134 				PIPE_CONFIG(ADDR_SURF_P2) |
3135 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3136 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3137 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3138 				PIPE_CONFIG(ADDR_SURF_P2));
3139 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3140 				PIPE_CONFIG(ADDR_SURF_P2) |
3141 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3142 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3143 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3144 				 PIPE_CONFIG(ADDR_SURF_P2) |
3145 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3146 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3147 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3148 				 PIPE_CONFIG(ADDR_SURF_P2) |
3149 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3150 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3151 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3152 				 PIPE_CONFIG(ADDR_SURF_P2) |
3153 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3154 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3155 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3156 				 PIPE_CONFIG(ADDR_SURF_P2) |
3157 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3158 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3159 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3160 				 PIPE_CONFIG(ADDR_SURF_P2) |
3161 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3162 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3163 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3164 				 PIPE_CONFIG(ADDR_SURF_P2) |
3165 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3166 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3167 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3168 				 PIPE_CONFIG(ADDR_SURF_P2) |
3169 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3170 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3171 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3172 				 PIPE_CONFIG(ADDR_SURF_P2) |
3173 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3174 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3175 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3176 				 PIPE_CONFIG(ADDR_SURF_P2) |
3177 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3178 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3179 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3180 				 PIPE_CONFIG(ADDR_SURF_P2) |
3181 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3182 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3183 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3184 				 PIPE_CONFIG(ADDR_SURF_P2) |
3185 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3186 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3187 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3188 				 PIPE_CONFIG(ADDR_SURF_P2) |
3189 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3190 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3191 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3192 				 PIPE_CONFIG(ADDR_SURF_P2) |
3193 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3194 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3195 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3196 				 PIPE_CONFIG(ADDR_SURF_P2) |
3197 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3198 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3199 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3200 				 PIPE_CONFIG(ADDR_SURF_P2) |
3201 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3202 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3203 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3204 				 PIPE_CONFIG(ADDR_SURF_P2) |
3205 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3206 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3207 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3208 				 PIPE_CONFIG(ADDR_SURF_P2) |
3209 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3210 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3211 
3212 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3213 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3214 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3215 				NUM_BANKS(ADDR_SURF_8_BANK));
3216 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3217 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3218 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3219 				NUM_BANKS(ADDR_SURF_8_BANK));
3220 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3221 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3222 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3223 				NUM_BANKS(ADDR_SURF_8_BANK));
3224 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3225 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3226 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3227 				NUM_BANKS(ADDR_SURF_8_BANK));
3228 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3229 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3230 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3231 				NUM_BANKS(ADDR_SURF_8_BANK));
3232 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3233 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3234 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3235 				NUM_BANKS(ADDR_SURF_8_BANK));
3236 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3237 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3238 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3239 				NUM_BANKS(ADDR_SURF_8_BANK));
3240 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3241 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3242 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3243 				NUM_BANKS(ADDR_SURF_16_BANK));
3244 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3245 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3246 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3247 				NUM_BANKS(ADDR_SURF_16_BANK));
3248 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3249 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3250 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3251 				 NUM_BANKS(ADDR_SURF_16_BANK));
3252 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3253 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3254 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3255 				 NUM_BANKS(ADDR_SURF_16_BANK));
3256 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3257 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3258 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3259 				 NUM_BANKS(ADDR_SURF_16_BANK));
3260 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3261 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3262 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3263 				 NUM_BANKS(ADDR_SURF_16_BANK));
3264 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3265 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3266 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3267 				 NUM_BANKS(ADDR_SURF_8_BANK));
3268 
3269 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3270 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3271 			    reg_offset != 23)
3272 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3273 
3274 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3275 			if (reg_offset != 7)
3276 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3277 
3278 		break;
3279 	default:
3280 		dev_warn(adev->dev,
3281 			 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3282 			 adev->asic_type);
3283 
3284 	case CHIP_CARRIZO:
3285 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3286 				PIPE_CONFIG(ADDR_SURF_P2) |
3287 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3288 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3289 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3290 				PIPE_CONFIG(ADDR_SURF_P2) |
3291 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3292 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3293 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3294 				PIPE_CONFIG(ADDR_SURF_P2) |
3295 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3296 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3297 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3298 				PIPE_CONFIG(ADDR_SURF_P2) |
3299 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3300 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3301 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3302 				PIPE_CONFIG(ADDR_SURF_P2) |
3303 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3304 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3305 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3306 				PIPE_CONFIG(ADDR_SURF_P2) |
3307 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3308 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3309 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3310 				PIPE_CONFIG(ADDR_SURF_P2) |
3311 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3312 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3313 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3314 				PIPE_CONFIG(ADDR_SURF_P2));
3315 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3316 				PIPE_CONFIG(ADDR_SURF_P2) |
3317 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3318 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3319 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3320 				 PIPE_CONFIG(ADDR_SURF_P2) |
3321 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3322 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3323 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3324 				 PIPE_CONFIG(ADDR_SURF_P2) |
3325 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3326 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3327 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3328 				 PIPE_CONFIG(ADDR_SURF_P2) |
3329 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3330 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3331 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3332 				 PIPE_CONFIG(ADDR_SURF_P2) |
3333 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3334 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3335 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3336 				 PIPE_CONFIG(ADDR_SURF_P2) |
3337 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3338 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3339 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3340 				 PIPE_CONFIG(ADDR_SURF_P2) |
3341 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3342 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3343 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3344 				 PIPE_CONFIG(ADDR_SURF_P2) |
3345 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3346 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3347 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3348 				 PIPE_CONFIG(ADDR_SURF_P2) |
3349 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3350 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3351 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3352 				 PIPE_CONFIG(ADDR_SURF_P2) |
3353 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3354 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3355 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3356 				 PIPE_CONFIG(ADDR_SURF_P2) |
3357 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3358 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3359 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3360 				 PIPE_CONFIG(ADDR_SURF_P2) |
3361 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3362 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3363 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3364 				 PIPE_CONFIG(ADDR_SURF_P2) |
3365 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3366 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3367 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3368 				 PIPE_CONFIG(ADDR_SURF_P2) |
3369 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3370 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3371 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3372 				 PIPE_CONFIG(ADDR_SURF_P2) |
3373 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3374 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3375 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3376 				 PIPE_CONFIG(ADDR_SURF_P2) |
3377 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3378 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3379 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3380 				 PIPE_CONFIG(ADDR_SURF_P2) |
3381 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3382 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3383 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3384 				 PIPE_CONFIG(ADDR_SURF_P2) |
3385 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3386 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3387 
3388 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3389 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3390 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3391 				NUM_BANKS(ADDR_SURF_8_BANK));
3392 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3393 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3394 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3395 				NUM_BANKS(ADDR_SURF_8_BANK));
3396 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3397 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3398 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3399 				NUM_BANKS(ADDR_SURF_8_BANK));
3400 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3401 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3402 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3403 				NUM_BANKS(ADDR_SURF_8_BANK));
3404 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3405 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3406 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3407 				NUM_BANKS(ADDR_SURF_8_BANK));
3408 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3409 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3410 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3411 				NUM_BANKS(ADDR_SURF_8_BANK));
3412 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3413 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3414 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3415 				NUM_BANKS(ADDR_SURF_8_BANK));
3416 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3417 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3418 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3419 				NUM_BANKS(ADDR_SURF_16_BANK));
3420 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3421 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3422 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3423 				NUM_BANKS(ADDR_SURF_16_BANK));
3424 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3425 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3426 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3427 				 NUM_BANKS(ADDR_SURF_16_BANK));
3428 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3429 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3430 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3431 				 NUM_BANKS(ADDR_SURF_16_BANK));
3432 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3433 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3434 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3435 				 NUM_BANKS(ADDR_SURF_16_BANK));
3436 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3437 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3438 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3439 				 NUM_BANKS(ADDR_SURF_16_BANK));
3440 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3441 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3442 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3443 				 NUM_BANKS(ADDR_SURF_8_BANK));
3444 
3445 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3446 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3447 			    reg_offset != 23)
3448 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3449 
3450 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3451 			if (reg_offset != 7)
3452 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3453 
3454 		break;
3455 	}
3456 }
3457 
3458 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3459 				  u32 se_num, u32 sh_num, u32 instance)
3460 {
3461 	u32 data;
3462 
3463 	if (instance == 0xffffffff)
3464 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3465 	else
3466 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3467 
3468 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
3469 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3470 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3471 	} else if (se_num == 0xffffffff) {
3472 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3473 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3474 	} else if (sh_num == 0xffffffff) {
3475 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3476 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3477 	} else {
3478 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3479 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3480 	}
3481 	WREG32(mmGRBM_GFX_INDEX, data);
3482 }
3483 
3484 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3485 {
3486 	return (u32)((1ULL << bit_width) - 1);
3487 }
3488 
3489 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3490 {
3491 	u32 data, mask;
3492 
3493 	data = RREG32(mmCC_RB_BACKEND_DISABLE);
3494 	data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3495 
3496 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
3497 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
3498 
3499 	mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3500 				       adev->gfx.config.max_sh_per_se);
3501 
3502 	return (~data) & mask;
3503 }
3504 
3505 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3506 {
3507 	int i, j;
3508 	u32 data;
3509 	u32 active_rbs = 0;
3510 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3511 					adev->gfx.config.max_sh_per_se;
3512 
3513 	mutex_lock(&adev->grbm_idx_mutex);
3514 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3515 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3516 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3517 			data = gfx_v8_0_get_rb_active_bitmap(adev);
3518 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3519 					       rb_bitmap_width_per_sh);
3520 		}
3521 	}
3522 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3523 	mutex_unlock(&adev->grbm_idx_mutex);
3524 
3525 	adev->gfx.config.backend_enable_mask = active_rbs;
3526 	adev->gfx.config.num_rbs = hweight32(active_rbs);
3527 }
3528 
3529 /**
3530  * gfx_v8_0_init_compute_vmid - gart enable
3531  *
3532  * @rdev: amdgpu_device pointer
3533  *
3534  * Initialize compute vmid sh_mem registers
3535  *
3536  */
3537 #define DEFAULT_SH_MEM_BASES	(0x6000)
3538 #define FIRST_COMPUTE_VMID	(8)
3539 #define LAST_COMPUTE_VMID	(16)
3540 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3541 {
3542 	int i;
3543 	uint32_t sh_mem_config;
3544 	uint32_t sh_mem_bases;
3545 
3546 	/*
3547 	 * Configure apertures:
3548 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3549 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3550 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3551 	 */
3552 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3553 
3554 	sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3555 			SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3556 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3557 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3558 			MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3559 			SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3560 
3561 	mutex_lock(&adev->srbm_mutex);
3562 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3563 		vi_srbm_select(adev, 0, 0, 0, i);
3564 		/* CP and shaders */
3565 		WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3566 		WREG32(mmSH_MEM_APE1_BASE, 1);
3567 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3568 		WREG32(mmSH_MEM_BASES, sh_mem_bases);
3569 	}
3570 	vi_srbm_select(adev, 0, 0, 0, 0);
3571 	mutex_unlock(&adev->srbm_mutex);
3572 }
3573 
3574 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3575 {
3576 	u32 tmp;
3577 	int i;
3578 
3579 	tmp = RREG32(mmGRBM_CNTL);
3580 	tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
3581 	WREG32(mmGRBM_CNTL, tmp);
3582 
3583 	WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3584 	WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3585 	WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3586 
3587 	gfx_v8_0_tiling_mode_table_init(adev);
3588 
3589 	gfx_v8_0_setup_rb(adev);
3590 	gfx_v8_0_get_cu_info(adev);
3591 
3592 	/* XXX SH_MEM regs */
3593 	/* where to put LDS, scratch, GPUVM in FSA64 space */
3594 	mutex_lock(&adev->srbm_mutex);
3595 	for (i = 0; i < 16; i++) {
3596 		vi_srbm_select(adev, 0, 0, 0, i);
3597 		/* CP and shaders */
3598 		if (i == 0) {
3599 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3600 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3601 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3602 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3603 			WREG32(mmSH_MEM_CONFIG, tmp);
3604 		} else {
3605 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3606 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3607 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3608 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3609 			WREG32(mmSH_MEM_CONFIG, tmp);
3610 		}
3611 
3612 		WREG32(mmSH_MEM_APE1_BASE, 1);
3613 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3614 		WREG32(mmSH_MEM_BASES, 0);
3615 	}
3616 	vi_srbm_select(adev, 0, 0, 0, 0);
3617 	mutex_unlock(&adev->srbm_mutex);
3618 
3619 	gfx_v8_0_init_compute_vmid(adev);
3620 
3621 	mutex_lock(&adev->grbm_idx_mutex);
3622 	/*
3623 	 * making sure that the following register writes will be broadcasted
3624 	 * to all the shaders
3625 	 */
3626 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3627 
3628 	WREG32(mmPA_SC_FIFO_SIZE,
3629 		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
3630 			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3631 		   (adev->gfx.config.sc_prim_fifo_size_backend <<
3632 			PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3633 		   (adev->gfx.config.sc_hiz_tile_fifo_size <<
3634 			PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3635 		   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3636 			PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3637 	mutex_unlock(&adev->grbm_idx_mutex);
3638 
3639 }
3640 
3641 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3642 {
3643 	u32 i, j, k;
3644 	u32 mask;
3645 
3646 	mutex_lock(&adev->grbm_idx_mutex);
3647 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3648 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3649 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3650 			for (k = 0; k < adev->usec_timeout; k++) {
3651 				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3652 					break;
3653 				udelay(1);
3654 			}
3655 		}
3656 	}
3657 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3658 	mutex_unlock(&adev->grbm_idx_mutex);
3659 
3660 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3661 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3662 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3663 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3664 	for (k = 0; k < adev->usec_timeout; k++) {
3665 		if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3666 			break;
3667 		udelay(1);
3668 	}
3669 }
3670 
3671 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3672 					       bool enable)
3673 {
3674 	u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3675 
3676 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3677 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3678 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3679 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3680 
3681 	WREG32(mmCP_INT_CNTL_RING0, tmp);
3682 }
3683 
3684 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3685 {
3686 	/* csib */
3687 	WREG32(mmRLC_CSIB_ADDR_HI,
3688 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
3689 	WREG32(mmRLC_CSIB_ADDR_LO,
3690 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3691 	WREG32(mmRLC_CSIB_LENGTH,
3692 			adev->gfx.rlc.clear_state_size);
3693 }
3694 
3695 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3696 				int ind_offset,
3697 				int list_size,
3698 				int *unique_indices,
3699 				int *indices_count,
3700 				int max_indices,
3701 				int *ind_start_offsets,
3702 				int *offset_count,
3703 				int max_offset)
3704 {
3705 	int indices;
3706 	bool new_entry = true;
3707 
3708 	for (; ind_offset < list_size; ind_offset++) {
3709 
3710 		if (new_entry) {
3711 			new_entry = false;
3712 			ind_start_offsets[*offset_count] = ind_offset;
3713 			*offset_count = *offset_count + 1;
3714 			BUG_ON(*offset_count >= max_offset);
3715 		}
3716 
3717 		if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3718 			new_entry = true;
3719 			continue;
3720 		}
3721 
3722 		ind_offset += 2;
3723 
3724 		/* look for the matching indice */
3725 		for (indices = 0;
3726 			indices < *indices_count;
3727 			indices++) {
3728 			if (unique_indices[indices] ==
3729 				register_list_format[ind_offset])
3730 				break;
3731 		}
3732 
3733 		if (indices >= *indices_count) {
3734 			unique_indices[*indices_count] =
3735 				register_list_format[ind_offset];
3736 			indices = *indices_count;
3737 			*indices_count = *indices_count + 1;
3738 			BUG_ON(*indices_count >= max_indices);
3739 		}
3740 
3741 		register_list_format[ind_offset] = indices;
3742 	}
3743 }
3744 
3745 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3746 {
3747 	int i, temp, data;
3748 	int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3749 	int indices_count = 0;
3750 	int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3751 	int offset_count = 0;
3752 
3753 	int list_size;
3754 	unsigned int *register_list_format =
3755 		kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3756 	if (register_list_format == NULL)
3757 		return -ENOMEM;
3758 	memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3759 			adev->gfx.rlc.reg_list_format_size_bytes);
3760 
3761 	gfx_v8_0_parse_ind_reg_list(register_list_format,
3762 				RLC_FormatDirectRegListLength,
3763 				adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3764 				unique_indices,
3765 				&indices_count,
3766 				sizeof(unique_indices) / sizeof(int),
3767 				indirect_start_offsets,
3768 				&offset_count,
3769 				sizeof(indirect_start_offsets)/sizeof(int));
3770 
3771 	/* save and restore list */
3772 	temp = RREG32(mmRLC_SRM_CNTL);
3773 	temp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
3774 	WREG32(mmRLC_SRM_CNTL, temp);
3775 
3776 	WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3777 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3778 		WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3779 
3780 	/* indirect list */
3781 	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3782 	for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3783 		WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3784 
3785 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3786 	list_size = list_size >> 1;
3787 	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3788 	WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3789 
3790 	/* starting offsets starts */
3791 	WREG32(mmRLC_GPM_SCRATCH_ADDR,
3792 		adev->gfx.rlc.starting_offsets_start);
3793 	for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3794 		WREG32(mmRLC_GPM_SCRATCH_DATA,
3795 				indirect_start_offsets[i]);
3796 
3797 	/* unique indices */
3798 	temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3799 	data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3800 	for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3801 		amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false);
3802 		amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false);
3803 	}
3804 	kfree(register_list_format);
3805 
3806 	return 0;
3807 }
3808 
3809 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3810 {
3811 	uint32_t data;
3812 
3813 	data = RREG32(mmRLC_SRM_CNTL);
3814 	data |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
3815 	WREG32(mmRLC_SRM_CNTL, data);
3816 }
3817 
3818 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3819 {
3820 	uint32_t data;
3821 
3822 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3823 			      AMD_PG_SUPPORT_GFX_SMG |
3824 			      AMD_PG_SUPPORT_GFX_DMG)) {
3825 		data = RREG32(mmCP_RB_WPTR_POLL_CNTL);
3826 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
3827 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
3828 		WREG32(mmCP_RB_WPTR_POLL_CNTL, data);
3829 
3830 		data = 0;
3831 		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
3832 		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
3833 		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
3834 		data |= (0x10 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
3835 		WREG32(mmRLC_PG_DELAY, data);
3836 
3837 		data = RREG32(mmRLC_PG_DELAY_2);
3838 		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
3839 		data |= (0x3 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
3840 		WREG32(mmRLC_PG_DELAY_2, data);
3841 
3842 		data = RREG32(mmRLC_AUTO_PG_CTRL);
3843 		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
3844 		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
3845 		WREG32(mmRLC_AUTO_PG_CTRL, data);
3846 	}
3847 }
3848 
3849 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
3850 						bool enable)
3851 {
3852 	u32 data, orig;
3853 
3854 	orig = data = RREG32(mmRLC_PG_CNTL);
3855 
3856 	if (enable)
3857 		data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
3858 	else
3859 		data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
3860 
3861 	if (orig != data)
3862 		WREG32(mmRLC_PG_CNTL, data);
3863 }
3864 
3865 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
3866 						  bool enable)
3867 {
3868 	u32 data, orig;
3869 
3870 	orig = data = RREG32(mmRLC_PG_CNTL);
3871 
3872 	if (enable)
3873 		data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
3874 	else
3875 		data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
3876 
3877 	if (orig != data)
3878 		WREG32(mmRLC_PG_CNTL, data);
3879 }
3880 
3881 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
3882 {
3883 	u32 data, orig;
3884 
3885 	orig = data = RREG32(mmRLC_PG_CNTL);
3886 
3887 	if (enable)
3888 		data &= ~RLC_PG_CNTL__CP_PG_DISABLE_MASK;
3889 	else
3890 		data |= RLC_PG_CNTL__CP_PG_DISABLE_MASK;
3891 
3892 	if (orig != data)
3893 		WREG32(mmRLC_PG_CNTL, data);
3894 }
3895 
3896 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
3897 {
3898 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3899 			      AMD_PG_SUPPORT_GFX_SMG |
3900 			      AMD_PG_SUPPORT_GFX_DMG |
3901 			      AMD_PG_SUPPORT_CP |
3902 			      AMD_PG_SUPPORT_GDS |
3903 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
3904 		gfx_v8_0_init_csb(adev);
3905 		gfx_v8_0_init_save_restore_list(adev);
3906 		gfx_v8_0_enable_save_restore_machine(adev);
3907 
3908 		if ((adev->asic_type == CHIP_CARRIZO) ||
3909 		    (adev->asic_type == CHIP_STONEY)) {
3910 			WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
3911 			gfx_v8_0_init_power_gating(adev);
3912 			WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
3913 			if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
3914 				cz_enable_sck_slow_down_on_power_up(adev, true);
3915 				cz_enable_sck_slow_down_on_power_down(adev, true);
3916 			} else {
3917 				cz_enable_sck_slow_down_on_power_up(adev, false);
3918 				cz_enable_sck_slow_down_on_power_down(adev, false);
3919 			}
3920 			if (adev->pg_flags & AMD_PG_SUPPORT_CP)
3921 				cz_enable_cp_power_gating(adev, true);
3922 			else
3923 				cz_enable_cp_power_gating(adev, false);
3924 		} else if (adev->asic_type == CHIP_POLARIS11) {
3925 			gfx_v8_0_init_power_gating(adev);
3926 		}
3927 	}
3928 }
3929 
3930 void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
3931 {
3932 	u32 tmp = RREG32(mmRLC_CNTL);
3933 
3934 	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
3935 	WREG32(mmRLC_CNTL, tmp);
3936 
3937 	gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3938 
3939 	gfx_v8_0_wait_for_rlc_serdes(adev);
3940 }
3941 
3942 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
3943 {
3944 	u32 tmp = RREG32(mmGRBM_SOFT_RESET);
3945 
3946 	tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3947 	WREG32(mmGRBM_SOFT_RESET, tmp);
3948 	udelay(50);
3949 	tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3950 	WREG32(mmGRBM_SOFT_RESET, tmp);
3951 	udelay(50);
3952 }
3953 
3954 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
3955 {
3956 	u32 tmp = RREG32(mmRLC_CNTL);
3957 
3958 	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
3959 	WREG32(mmRLC_CNTL, tmp);
3960 
3961 	/* carrizo do enable cp interrupt after cp inited */
3962 	if (!(adev->flags & AMD_IS_APU))
3963 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3964 
3965 	udelay(50);
3966 }
3967 
3968 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
3969 {
3970 	const struct rlc_firmware_header_v2_0 *hdr;
3971 	const __le32 *fw_data;
3972 	unsigned i, fw_size;
3973 
3974 	if (!adev->gfx.rlc_fw)
3975 		return -EINVAL;
3976 
3977 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3978 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
3979 
3980 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3981 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3982 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3983 
3984 	WREG32(mmRLC_GPM_UCODE_ADDR, 0);
3985 	for (i = 0; i < fw_size; i++)
3986 		WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3987 	WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3988 
3989 	return 0;
3990 }
3991 
3992 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
3993 {
3994 	int r;
3995 
3996 	gfx_v8_0_rlc_stop(adev);
3997 
3998 	/* disable CG */
3999 	WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
4000 	if (adev->asic_type == CHIP_POLARIS11 ||
4001 		adev->asic_type == CHIP_POLARIS10)
4002 		WREG32(mmRLC_CGCG_CGLS_CTRL_3D, 0);
4003 
4004 	/* disable PG */
4005 	WREG32(mmRLC_PG_CNTL, 0);
4006 
4007 	gfx_v8_0_rlc_reset(adev);
4008 
4009 	gfx_v8_0_init_pg(adev);
4010 
4011 	if (!adev->pp_enabled) {
4012 		if (!adev->firmware.smu_load) {
4013 			/* legacy rlc firmware loading */
4014 			r = gfx_v8_0_rlc_load_microcode(adev);
4015 			if (r)
4016 				return r;
4017 		} else {
4018 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4019 							AMDGPU_UCODE_ID_RLC_G);
4020 			if (r)
4021 				return -EINVAL;
4022 		}
4023 	}
4024 
4025 	gfx_v8_0_rlc_start(adev);
4026 
4027 	return 0;
4028 }
4029 
4030 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4031 {
4032 	int i;
4033 	u32 tmp = RREG32(mmCP_ME_CNTL);
4034 
4035 	if (enable) {
4036 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4037 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4038 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4039 	} else {
4040 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4041 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4042 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4043 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4044 			adev->gfx.gfx_ring[i].ready = false;
4045 	}
4046 	WREG32(mmCP_ME_CNTL, tmp);
4047 	udelay(50);
4048 }
4049 
4050 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4051 {
4052 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
4053 	const struct gfx_firmware_header_v1_0 *ce_hdr;
4054 	const struct gfx_firmware_header_v1_0 *me_hdr;
4055 	const __le32 *fw_data;
4056 	unsigned i, fw_size;
4057 
4058 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4059 		return -EINVAL;
4060 
4061 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4062 		adev->gfx.pfp_fw->data;
4063 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4064 		adev->gfx.ce_fw->data;
4065 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
4066 		adev->gfx.me_fw->data;
4067 
4068 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4069 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4070 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4071 
4072 	gfx_v8_0_cp_gfx_enable(adev, false);
4073 
4074 	/* PFP */
4075 	fw_data = (const __le32 *)
4076 		(adev->gfx.pfp_fw->data +
4077 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4078 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4079 	WREG32(mmCP_PFP_UCODE_ADDR, 0);
4080 	for (i = 0; i < fw_size; i++)
4081 		WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4082 	WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4083 
4084 	/* CE */
4085 	fw_data = (const __le32 *)
4086 		(adev->gfx.ce_fw->data +
4087 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4088 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4089 	WREG32(mmCP_CE_UCODE_ADDR, 0);
4090 	for (i = 0; i < fw_size; i++)
4091 		WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4092 	WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4093 
4094 	/* ME */
4095 	fw_data = (const __le32 *)
4096 		(adev->gfx.me_fw->data +
4097 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4098 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4099 	WREG32(mmCP_ME_RAM_WADDR, 0);
4100 	for (i = 0; i < fw_size; i++)
4101 		WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4102 	WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4103 
4104 	return 0;
4105 }
4106 
4107 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4108 {
4109 	u32 count = 0;
4110 	const struct cs_section_def *sect = NULL;
4111 	const struct cs_extent_def *ext = NULL;
4112 
4113 	/* begin clear state */
4114 	count += 2;
4115 	/* context control state */
4116 	count += 3;
4117 
4118 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4119 		for (ext = sect->section; ext->extent != NULL; ++ext) {
4120 			if (sect->id == SECT_CONTEXT)
4121 				count += 2 + ext->reg_count;
4122 			else
4123 				return 0;
4124 		}
4125 	}
4126 	/* pa_sc_raster_config/pa_sc_raster_config1 */
4127 	count += 4;
4128 	/* end clear state */
4129 	count += 2;
4130 	/* clear state */
4131 	count += 2;
4132 
4133 	return count;
4134 }
4135 
4136 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4137 {
4138 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4139 	const struct cs_section_def *sect = NULL;
4140 	const struct cs_extent_def *ext = NULL;
4141 	int r, i;
4142 
4143 	/* init the CP */
4144 	WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4145 	WREG32(mmCP_ENDIAN_SWAP, 0);
4146 	WREG32(mmCP_DEVICE_ID, 1);
4147 
4148 	gfx_v8_0_cp_gfx_enable(adev, true);
4149 
4150 	r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4151 	if (r) {
4152 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4153 		return r;
4154 	}
4155 
4156 	/* clear state buffer */
4157 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4158 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4159 
4160 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4161 	amdgpu_ring_write(ring, 0x80000000);
4162 	amdgpu_ring_write(ring, 0x80000000);
4163 
4164 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4165 		for (ext = sect->section; ext->extent != NULL; ++ext) {
4166 			if (sect->id == SECT_CONTEXT) {
4167 				amdgpu_ring_write(ring,
4168 				       PACKET3(PACKET3_SET_CONTEXT_REG,
4169 					       ext->reg_count));
4170 				amdgpu_ring_write(ring,
4171 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4172 				for (i = 0; i < ext->reg_count; i++)
4173 					amdgpu_ring_write(ring, ext->extent[i]);
4174 			}
4175 		}
4176 	}
4177 
4178 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4179 	amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4180 	switch (adev->asic_type) {
4181 	case CHIP_TONGA:
4182 	case CHIP_POLARIS10:
4183 		amdgpu_ring_write(ring, 0x16000012);
4184 		amdgpu_ring_write(ring, 0x0000002A);
4185 		break;
4186 	case CHIP_POLARIS11:
4187 		amdgpu_ring_write(ring, 0x16000012);
4188 		amdgpu_ring_write(ring, 0x00000000);
4189 		break;
4190 	case CHIP_FIJI:
4191 		amdgpu_ring_write(ring, 0x3a00161a);
4192 		amdgpu_ring_write(ring, 0x0000002e);
4193 		break;
4194 	case CHIP_CARRIZO:
4195 		amdgpu_ring_write(ring, 0x00000002);
4196 		amdgpu_ring_write(ring, 0x00000000);
4197 		break;
4198 	case CHIP_TOPAZ:
4199 		amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4200 				0x00000000 : 0x00000002);
4201 		amdgpu_ring_write(ring, 0x00000000);
4202 		break;
4203 	case CHIP_STONEY:
4204 		amdgpu_ring_write(ring, 0x00000000);
4205 		amdgpu_ring_write(ring, 0x00000000);
4206 		break;
4207 	default:
4208 		BUG();
4209 	}
4210 
4211 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4212 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4213 
4214 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4215 	amdgpu_ring_write(ring, 0);
4216 
4217 	/* init the CE partitions */
4218 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4219 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4220 	amdgpu_ring_write(ring, 0x8000);
4221 	amdgpu_ring_write(ring, 0x8000);
4222 
4223 	amdgpu_ring_commit(ring);
4224 
4225 	return 0;
4226 }
4227 
4228 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4229 {
4230 	struct amdgpu_ring *ring;
4231 	u32 tmp;
4232 	u32 rb_bufsz;
4233 	u64 rb_addr, rptr_addr;
4234 	int r;
4235 
4236 	/* Set the write pointer delay */
4237 	WREG32(mmCP_RB_WPTR_DELAY, 0);
4238 
4239 	/* set the RB to use vmid 0 */
4240 	WREG32(mmCP_RB_VMID, 0);
4241 
4242 	/* Set ring buffer size */
4243 	ring = &adev->gfx.gfx_ring[0];
4244 	rb_bufsz = order_base_2(ring->ring_size / 8);
4245 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4246 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4247 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4248 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4249 #ifdef __BIG_ENDIAN
4250 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4251 #endif
4252 	WREG32(mmCP_RB0_CNTL, tmp);
4253 
4254 	/* Initialize the ring buffer's read and write pointers */
4255 	WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4256 	ring->wptr = 0;
4257 	WREG32(mmCP_RB0_WPTR, ring->wptr);
4258 
4259 	/* set the wb address wether it's enabled or not */
4260 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4261 	WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4262 	WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4263 
4264 	mdelay(1);
4265 	WREG32(mmCP_RB0_CNTL, tmp);
4266 
4267 	rb_addr = ring->gpu_addr >> 8;
4268 	WREG32(mmCP_RB0_BASE, rb_addr);
4269 	WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4270 
4271 	/* no gfx doorbells on iceland */
4272 	if (adev->asic_type != CHIP_TOPAZ) {
4273 		tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4274 		if (ring->use_doorbell) {
4275 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4276 					    DOORBELL_OFFSET, ring->doorbell_index);
4277 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4278 					    DOORBELL_HIT, 0);
4279 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4280 					    DOORBELL_EN, 1);
4281 		} else {
4282 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4283 					    DOORBELL_EN, 0);
4284 		}
4285 		WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4286 
4287 		if (adev->asic_type == CHIP_TONGA) {
4288 			tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4289 					    DOORBELL_RANGE_LOWER,
4290 					    AMDGPU_DOORBELL_GFX_RING0);
4291 			WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4292 
4293 			WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4294 			       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4295 		}
4296 
4297 	}
4298 
4299 	/* start the ring */
4300 	gfx_v8_0_cp_gfx_start(adev);
4301 	ring->ready = true;
4302 	r = amdgpu_ring_test_ring(ring);
4303 	if (r) {
4304 		ring->ready = false;
4305 		return r;
4306 	}
4307 
4308 	return 0;
4309 }
4310 
4311 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4312 {
4313 	int i;
4314 
4315 	if (enable) {
4316 		WREG32(mmCP_MEC_CNTL, 0);
4317 	} else {
4318 		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4319 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
4320 			adev->gfx.compute_ring[i].ready = false;
4321 	}
4322 	udelay(50);
4323 }
4324 
4325 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4326 {
4327 	const struct gfx_firmware_header_v1_0 *mec_hdr;
4328 	const __le32 *fw_data;
4329 	unsigned i, fw_size;
4330 
4331 	if (!adev->gfx.mec_fw)
4332 		return -EINVAL;
4333 
4334 	gfx_v8_0_cp_compute_enable(adev, false);
4335 
4336 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4337 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4338 
4339 	fw_data = (const __le32 *)
4340 		(adev->gfx.mec_fw->data +
4341 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4342 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4343 
4344 	/* MEC1 */
4345 	WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4346 	for (i = 0; i < fw_size; i++)
4347 		WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4348 	WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4349 
4350 	/* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4351 	if (adev->gfx.mec2_fw) {
4352 		const struct gfx_firmware_header_v1_0 *mec2_hdr;
4353 
4354 		mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4355 		amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4356 
4357 		fw_data = (const __le32 *)
4358 			(adev->gfx.mec2_fw->data +
4359 			 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4360 		fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4361 
4362 		WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4363 		for (i = 0; i < fw_size; i++)
4364 			WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4365 		WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4366 	}
4367 
4368 	return 0;
4369 }
4370 
4371 struct vi_mqd {
4372 	uint32_t header;  /* ordinal0 */
4373 	uint32_t compute_dispatch_initiator;  /* ordinal1 */
4374 	uint32_t compute_dim_x;  /* ordinal2 */
4375 	uint32_t compute_dim_y;  /* ordinal3 */
4376 	uint32_t compute_dim_z;  /* ordinal4 */
4377 	uint32_t compute_start_x;  /* ordinal5 */
4378 	uint32_t compute_start_y;  /* ordinal6 */
4379 	uint32_t compute_start_z;  /* ordinal7 */
4380 	uint32_t compute_num_thread_x;  /* ordinal8 */
4381 	uint32_t compute_num_thread_y;  /* ordinal9 */
4382 	uint32_t compute_num_thread_z;  /* ordinal10 */
4383 	uint32_t compute_pipelinestat_enable;  /* ordinal11 */
4384 	uint32_t compute_perfcount_enable;  /* ordinal12 */
4385 	uint32_t compute_pgm_lo;  /* ordinal13 */
4386 	uint32_t compute_pgm_hi;  /* ordinal14 */
4387 	uint32_t compute_tba_lo;  /* ordinal15 */
4388 	uint32_t compute_tba_hi;  /* ordinal16 */
4389 	uint32_t compute_tma_lo;  /* ordinal17 */
4390 	uint32_t compute_tma_hi;  /* ordinal18 */
4391 	uint32_t compute_pgm_rsrc1;  /* ordinal19 */
4392 	uint32_t compute_pgm_rsrc2;  /* ordinal20 */
4393 	uint32_t compute_vmid;  /* ordinal21 */
4394 	uint32_t compute_resource_limits;  /* ordinal22 */
4395 	uint32_t compute_static_thread_mgmt_se0;  /* ordinal23 */
4396 	uint32_t compute_static_thread_mgmt_se1;  /* ordinal24 */
4397 	uint32_t compute_tmpring_size;  /* ordinal25 */
4398 	uint32_t compute_static_thread_mgmt_se2;  /* ordinal26 */
4399 	uint32_t compute_static_thread_mgmt_se3;  /* ordinal27 */
4400 	uint32_t compute_restart_x;  /* ordinal28 */
4401 	uint32_t compute_restart_y;  /* ordinal29 */
4402 	uint32_t compute_restart_z;  /* ordinal30 */
4403 	uint32_t compute_thread_trace_enable;  /* ordinal31 */
4404 	uint32_t compute_misc_reserved;  /* ordinal32 */
4405 	uint32_t compute_dispatch_id;  /* ordinal33 */
4406 	uint32_t compute_threadgroup_id;  /* ordinal34 */
4407 	uint32_t compute_relaunch;  /* ordinal35 */
4408 	uint32_t compute_wave_restore_addr_lo;  /* ordinal36 */
4409 	uint32_t compute_wave_restore_addr_hi;  /* ordinal37 */
4410 	uint32_t compute_wave_restore_control;  /* ordinal38 */
4411 	uint32_t reserved9;  /* ordinal39 */
4412 	uint32_t reserved10;  /* ordinal40 */
4413 	uint32_t reserved11;  /* ordinal41 */
4414 	uint32_t reserved12;  /* ordinal42 */
4415 	uint32_t reserved13;  /* ordinal43 */
4416 	uint32_t reserved14;  /* ordinal44 */
4417 	uint32_t reserved15;  /* ordinal45 */
4418 	uint32_t reserved16;  /* ordinal46 */
4419 	uint32_t reserved17;  /* ordinal47 */
4420 	uint32_t reserved18;  /* ordinal48 */
4421 	uint32_t reserved19;  /* ordinal49 */
4422 	uint32_t reserved20;  /* ordinal50 */
4423 	uint32_t reserved21;  /* ordinal51 */
4424 	uint32_t reserved22;  /* ordinal52 */
4425 	uint32_t reserved23;  /* ordinal53 */
4426 	uint32_t reserved24;  /* ordinal54 */
4427 	uint32_t reserved25;  /* ordinal55 */
4428 	uint32_t reserved26;  /* ordinal56 */
4429 	uint32_t reserved27;  /* ordinal57 */
4430 	uint32_t reserved28;  /* ordinal58 */
4431 	uint32_t reserved29;  /* ordinal59 */
4432 	uint32_t reserved30;  /* ordinal60 */
4433 	uint32_t reserved31;  /* ordinal61 */
4434 	uint32_t reserved32;  /* ordinal62 */
4435 	uint32_t reserved33;  /* ordinal63 */
4436 	uint32_t reserved34;  /* ordinal64 */
4437 	uint32_t compute_user_data_0;  /* ordinal65 */
4438 	uint32_t compute_user_data_1;  /* ordinal66 */
4439 	uint32_t compute_user_data_2;  /* ordinal67 */
4440 	uint32_t compute_user_data_3;  /* ordinal68 */
4441 	uint32_t compute_user_data_4;  /* ordinal69 */
4442 	uint32_t compute_user_data_5;  /* ordinal70 */
4443 	uint32_t compute_user_data_6;  /* ordinal71 */
4444 	uint32_t compute_user_data_7;  /* ordinal72 */
4445 	uint32_t compute_user_data_8;  /* ordinal73 */
4446 	uint32_t compute_user_data_9;  /* ordinal74 */
4447 	uint32_t compute_user_data_10;  /* ordinal75 */
4448 	uint32_t compute_user_data_11;  /* ordinal76 */
4449 	uint32_t compute_user_data_12;  /* ordinal77 */
4450 	uint32_t compute_user_data_13;  /* ordinal78 */
4451 	uint32_t compute_user_data_14;  /* ordinal79 */
4452 	uint32_t compute_user_data_15;  /* ordinal80 */
4453 	uint32_t cp_compute_csinvoc_count_lo;  /* ordinal81 */
4454 	uint32_t cp_compute_csinvoc_count_hi;  /* ordinal82 */
4455 	uint32_t reserved35;  /* ordinal83 */
4456 	uint32_t reserved36;  /* ordinal84 */
4457 	uint32_t reserved37;  /* ordinal85 */
4458 	uint32_t cp_mqd_query_time_lo;  /* ordinal86 */
4459 	uint32_t cp_mqd_query_time_hi;  /* ordinal87 */
4460 	uint32_t cp_mqd_connect_start_time_lo;  /* ordinal88 */
4461 	uint32_t cp_mqd_connect_start_time_hi;  /* ordinal89 */
4462 	uint32_t cp_mqd_connect_end_time_lo;  /* ordinal90 */
4463 	uint32_t cp_mqd_connect_end_time_hi;  /* ordinal91 */
4464 	uint32_t cp_mqd_connect_end_wf_count;  /* ordinal92 */
4465 	uint32_t cp_mqd_connect_end_pq_rptr;  /* ordinal93 */
4466 	uint32_t cp_mqd_connect_end_pq_wptr;  /* ordinal94 */
4467 	uint32_t cp_mqd_connect_end_ib_rptr;  /* ordinal95 */
4468 	uint32_t reserved38;  /* ordinal96 */
4469 	uint32_t reserved39;  /* ordinal97 */
4470 	uint32_t cp_mqd_save_start_time_lo;  /* ordinal98 */
4471 	uint32_t cp_mqd_save_start_time_hi;  /* ordinal99 */
4472 	uint32_t cp_mqd_save_end_time_lo;  /* ordinal100 */
4473 	uint32_t cp_mqd_save_end_time_hi;  /* ordinal101 */
4474 	uint32_t cp_mqd_restore_start_time_lo;  /* ordinal102 */
4475 	uint32_t cp_mqd_restore_start_time_hi;  /* ordinal103 */
4476 	uint32_t cp_mqd_restore_end_time_lo;  /* ordinal104 */
4477 	uint32_t cp_mqd_restore_end_time_hi;  /* ordinal105 */
4478 	uint32_t reserved40;  /* ordinal106 */
4479 	uint32_t reserved41;  /* ordinal107 */
4480 	uint32_t gds_cs_ctxsw_cnt0;  /* ordinal108 */
4481 	uint32_t gds_cs_ctxsw_cnt1;  /* ordinal109 */
4482 	uint32_t gds_cs_ctxsw_cnt2;  /* ordinal110 */
4483 	uint32_t gds_cs_ctxsw_cnt3;  /* ordinal111 */
4484 	uint32_t reserved42;  /* ordinal112 */
4485 	uint32_t reserved43;  /* ordinal113 */
4486 	uint32_t cp_pq_exe_status_lo;  /* ordinal114 */
4487 	uint32_t cp_pq_exe_status_hi;  /* ordinal115 */
4488 	uint32_t cp_packet_id_lo;  /* ordinal116 */
4489 	uint32_t cp_packet_id_hi;  /* ordinal117 */
4490 	uint32_t cp_packet_exe_status_lo;  /* ordinal118 */
4491 	uint32_t cp_packet_exe_status_hi;  /* ordinal119 */
4492 	uint32_t gds_save_base_addr_lo;  /* ordinal120 */
4493 	uint32_t gds_save_base_addr_hi;  /* ordinal121 */
4494 	uint32_t gds_save_mask_lo;  /* ordinal122 */
4495 	uint32_t gds_save_mask_hi;  /* ordinal123 */
4496 	uint32_t ctx_save_base_addr_lo;  /* ordinal124 */
4497 	uint32_t ctx_save_base_addr_hi;  /* ordinal125 */
4498 	uint32_t reserved44;  /* ordinal126 */
4499 	uint32_t reserved45;  /* ordinal127 */
4500 	uint32_t cp_mqd_base_addr_lo;  /* ordinal128 */
4501 	uint32_t cp_mqd_base_addr_hi;  /* ordinal129 */
4502 	uint32_t cp_hqd_active;  /* ordinal130 */
4503 	uint32_t cp_hqd_vmid;  /* ordinal131 */
4504 	uint32_t cp_hqd_persistent_state;  /* ordinal132 */
4505 	uint32_t cp_hqd_pipe_priority;  /* ordinal133 */
4506 	uint32_t cp_hqd_queue_priority;  /* ordinal134 */
4507 	uint32_t cp_hqd_quantum;  /* ordinal135 */
4508 	uint32_t cp_hqd_pq_base_lo;  /* ordinal136 */
4509 	uint32_t cp_hqd_pq_base_hi;  /* ordinal137 */
4510 	uint32_t cp_hqd_pq_rptr;  /* ordinal138 */
4511 	uint32_t cp_hqd_pq_rptr_report_addr_lo;  /* ordinal139 */
4512 	uint32_t cp_hqd_pq_rptr_report_addr_hi;  /* ordinal140 */
4513 	uint32_t cp_hqd_pq_wptr_poll_addr;  /* ordinal141 */
4514 	uint32_t cp_hqd_pq_wptr_poll_addr_hi;  /* ordinal142 */
4515 	uint32_t cp_hqd_pq_doorbell_control;  /* ordinal143 */
4516 	uint32_t cp_hqd_pq_wptr;  /* ordinal144 */
4517 	uint32_t cp_hqd_pq_control;  /* ordinal145 */
4518 	uint32_t cp_hqd_ib_base_addr_lo;  /* ordinal146 */
4519 	uint32_t cp_hqd_ib_base_addr_hi;  /* ordinal147 */
4520 	uint32_t cp_hqd_ib_rptr;  /* ordinal148 */
4521 	uint32_t cp_hqd_ib_control;  /* ordinal149 */
4522 	uint32_t cp_hqd_iq_timer;  /* ordinal150 */
4523 	uint32_t cp_hqd_iq_rptr;  /* ordinal151 */
4524 	uint32_t cp_hqd_dequeue_request;  /* ordinal152 */
4525 	uint32_t cp_hqd_dma_offload;  /* ordinal153 */
4526 	uint32_t cp_hqd_sema_cmd;  /* ordinal154 */
4527 	uint32_t cp_hqd_msg_type;  /* ordinal155 */
4528 	uint32_t cp_hqd_atomic0_preop_lo;  /* ordinal156 */
4529 	uint32_t cp_hqd_atomic0_preop_hi;  /* ordinal157 */
4530 	uint32_t cp_hqd_atomic1_preop_lo;  /* ordinal158 */
4531 	uint32_t cp_hqd_atomic1_preop_hi;  /* ordinal159 */
4532 	uint32_t cp_hqd_hq_status0;  /* ordinal160 */
4533 	uint32_t cp_hqd_hq_control0;  /* ordinal161 */
4534 	uint32_t cp_mqd_control;  /* ordinal162 */
4535 	uint32_t cp_hqd_hq_status1;  /* ordinal163 */
4536 	uint32_t cp_hqd_hq_control1;  /* ordinal164 */
4537 	uint32_t cp_hqd_eop_base_addr_lo;  /* ordinal165 */
4538 	uint32_t cp_hqd_eop_base_addr_hi;  /* ordinal166 */
4539 	uint32_t cp_hqd_eop_control;  /* ordinal167 */
4540 	uint32_t cp_hqd_eop_rptr;  /* ordinal168 */
4541 	uint32_t cp_hqd_eop_wptr;  /* ordinal169 */
4542 	uint32_t cp_hqd_eop_done_events;  /* ordinal170 */
4543 	uint32_t cp_hqd_ctx_save_base_addr_lo;  /* ordinal171 */
4544 	uint32_t cp_hqd_ctx_save_base_addr_hi;  /* ordinal172 */
4545 	uint32_t cp_hqd_ctx_save_control;  /* ordinal173 */
4546 	uint32_t cp_hqd_cntl_stack_offset;  /* ordinal174 */
4547 	uint32_t cp_hqd_cntl_stack_size;  /* ordinal175 */
4548 	uint32_t cp_hqd_wg_state_offset;  /* ordinal176 */
4549 	uint32_t cp_hqd_ctx_save_size;  /* ordinal177 */
4550 	uint32_t cp_hqd_gds_resource_state;  /* ordinal178 */
4551 	uint32_t cp_hqd_error;  /* ordinal179 */
4552 	uint32_t cp_hqd_eop_wptr_mem;  /* ordinal180 */
4553 	uint32_t cp_hqd_eop_dones;  /* ordinal181 */
4554 	uint32_t reserved46;  /* ordinal182 */
4555 	uint32_t reserved47;  /* ordinal183 */
4556 	uint32_t reserved48;  /* ordinal184 */
4557 	uint32_t reserved49;  /* ordinal185 */
4558 	uint32_t reserved50;  /* ordinal186 */
4559 	uint32_t reserved51;  /* ordinal187 */
4560 	uint32_t reserved52;  /* ordinal188 */
4561 	uint32_t reserved53;  /* ordinal189 */
4562 	uint32_t reserved54;  /* ordinal190 */
4563 	uint32_t reserved55;  /* ordinal191 */
4564 	uint32_t iqtimer_pkt_header;  /* ordinal192 */
4565 	uint32_t iqtimer_pkt_dw0;  /* ordinal193 */
4566 	uint32_t iqtimer_pkt_dw1;  /* ordinal194 */
4567 	uint32_t iqtimer_pkt_dw2;  /* ordinal195 */
4568 	uint32_t iqtimer_pkt_dw3;  /* ordinal196 */
4569 	uint32_t iqtimer_pkt_dw4;  /* ordinal197 */
4570 	uint32_t iqtimer_pkt_dw5;  /* ordinal198 */
4571 	uint32_t iqtimer_pkt_dw6;  /* ordinal199 */
4572 	uint32_t iqtimer_pkt_dw7;  /* ordinal200 */
4573 	uint32_t iqtimer_pkt_dw8;  /* ordinal201 */
4574 	uint32_t iqtimer_pkt_dw9;  /* ordinal202 */
4575 	uint32_t iqtimer_pkt_dw10;  /* ordinal203 */
4576 	uint32_t iqtimer_pkt_dw11;  /* ordinal204 */
4577 	uint32_t iqtimer_pkt_dw12;  /* ordinal205 */
4578 	uint32_t iqtimer_pkt_dw13;  /* ordinal206 */
4579 	uint32_t iqtimer_pkt_dw14;  /* ordinal207 */
4580 	uint32_t iqtimer_pkt_dw15;  /* ordinal208 */
4581 	uint32_t iqtimer_pkt_dw16;  /* ordinal209 */
4582 	uint32_t iqtimer_pkt_dw17;  /* ordinal210 */
4583 	uint32_t iqtimer_pkt_dw18;  /* ordinal211 */
4584 	uint32_t iqtimer_pkt_dw19;  /* ordinal212 */
4585 	uint32_t iqtimer_pkt_dw20;  /* ordinal213 */
4586 	uint32_t iqtimer_pkt_dw21;  /* ordinal214 */
4587 	uint32_t iqtimer_pkt_dw22;  /* ordinal215 */
4588 	uint32_t iqtimer_pkt_dw23;  /* ordinal216 */
4589 	uint32_t iqtimer_pkt_dw24;  /* ordinal217 */
4590 	uint32_t iqtimer_pkt_dw25;  /* ordinal218 */
4591 	uint32_t iqtimer_pkt_dw26;  /* ordinal219 */
4592 	uint32_t iqtimer_pkt_dw27;  /* ordinal220 */
4593 	uint32_t iqtimer_pkt_dw28;  /* ordinal221 */
4594 	uint32_t iqtimer_pkt_dw29;  /* ordinal222 */
4595 	uint32_t iqtimer_pkt_dw30;  /* ordinal223 */
4596 	uint32_t iqtimer_pkt_dw31;  /* ordinal224 */
4597 	uint32_t reserved56;  /* ordinal225 */
4598 	uint32_t reserved57;  /* ordinal226 */
4599 	uint32_t reserved58;  /* ordinal227 */
4600 	uint32_t set_resources_header;  /* ordinal228 */
4601 	uint32_t set_resources_dw1;  /* ordinal229 */
4602 	uint32_t set_resources_dw2;  /* ordinal230 */
4603 	uint32_t set_resources_dw3;  /* ordinal231 */
4604 	uint32_t set_resources_dw4;  /* ordinal232 */
4605 	uint32_t set_resources_dw5;  /* ordinal233 */
4606 	uint32_t set_resources_dw6;  /* ordinal234 */
4607 	uint32_t set_resources_dw7;  /* ordinal235 */
4608 	uint32_t reserved59;  /* ordinal236 */
4609 	uint32_t reserved60;  /* ordinal237 */
4610 	uint32_t reserved61;  /* ordinal238 */
4611 	uint32_t reserved62;  /* ordinal239 */
4612 	uint32_t reserved63;  /* ordinal240 */
4613 	uint32_t reserved64;  /* ordinal241 */
4614 	uint32_t reserved65;  /* ordinal242 */
4615 	uint32_t reserved66;  /* ordinal243 */
4616 	uint32_t reserved67;  /* ordinal244 */
4617 	uint32_t reserved68;  /* ordinal245 */
4618 	uint32_t reserved69;  /* ordinal246 */
4619 	uint32_t reserved70;  /* ordinal247 */
4620 	uint32_t reserved71;  /* ordinal248 */
4621 	uint32_t reserved72;  /* ordinal249 */
4622 	uint32_t reserved73;  /* ordinal250 */
4623 	uint32_t reserved74;  /* ordinal251 */
4624 	uint32_t reserved75;  /* ordinal252 */
4625 	uint32_t reserved76;  /* ordinal253 */
4626 	uint32_t reserved77;  /* ordinal254 */
4627 	uint32_t reserved78;  /* ordinal255 */
4628 
4629 	uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
4630 };
4631 
4632 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4633 {
4634 	int i, r;
4635 
4636 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4637 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4638 
4639 		if (ring->mqd_obj) {
4640 			r = amdgpu_bo_reserve(ring->mqd_obj, false);
4641 			if (unlikely(r != 0))
4642 				dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4643 
4644 			amdgpu_bo_unpin(ring->mqd_obj);
4645 			amdgpu_bo_unreserve(ring->mqd_obj);
4646 
4647 			amdgpu_bo_unref(&ring->mqd_obj);
4648 			ring->mqd_obj = NULL;
4649 		}
4650 	}
4651 }
4652 
4653 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4654 {
4655 	int r, i, j;
4656 	u32 tmp;
4657 	bool use_doorbell = true;
4658 	u64 hqd_gpu_addr;
4659 	u64 mqd_gpu_addr;
4660 	u64 eop_gpu_addr;
4661 	u64 wb_gpu_addr;
4662 	u32 *buf;
4663 	struct vi_mqd *mqd;
4664 
4665 	/* init the pipes */
4666 	mutex_lock(&adev->srbm_mutex);
4667 	for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
4668 		int me = (i < 4) ? 1 : 2;
4669 		int pipe = (i < 4) ? i : (i - 4);
4670 
4671 		eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
4672 		eop_gpu_addr >>= 8;
4673 
4674 		vi_srbm_select(adev, me, pipe, 0, 0);
4675 
4676 		/* write the EOP addr */
4677 		WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
4678 		WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4679 
4680 		/* set the VMID assigned */
4681 		WREG32(mmCP_HQD_VMID, 0);
4682 
4683 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4684 		tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4685 		tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4686 				    (order_base_2(MEC_HPD_SIZE / 4) - 1));
4687 		WREG32(mmCP_HQD_EOP_CONTROL, tmp);
4688 	}
4689 	vi_srbm_select(adev, 0, 0, 0, 0);
4690 	mutex_unlock(&adev->srbm_mutex);
4691 
4692 	/* init the queues.  Just two for now. */
4693 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4694 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4695 
4696 		if (ring->mqd_obj == NULL) {
4697 			r = amdgpu_bo_create(adev,
4698 					     sizeof(struct vi_mqd),
4699 					     PAGE_SIZE, true,
4700 					     AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4701 					     NULL, &ring->mqd_obj);
4702 			if (r) {
4703 				dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4704 				return r;
4705 			}
4706 		}
4707 
4708 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
4709 		if (unlikely(r != 0)) {
4710 			gfx_v8_0_cp_compute_fini(adev);
4711 			return r;
4712 		}
4713 		r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4714 				  &mqd_gpu_addr);
4715 		if (r) {
4716 			dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4717 			gfx_v8_0_cp_compute_fini(adev);
4718 			return r;
4719 		}
4720 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4721 		if (r) {
4722 			dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4723 			gfx_v8_0_cp_compute_fini(adev);
4724 			return r;
4725 		}
4726 
4727 		/* init the mqd struct */
4728 		memset(buf, 0, sizeof(struct vi_mqd));
4729 
4730 		mqd = (struct vi_mqd *)buf;
4731 		mqd->header = 0xC0310800;
4732 		mqd->compute_pipelinestat_enable = 0x00000001;
4733 		mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4734 		mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4735 		mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4736 		mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4737 		mqd->compute_misc_reserved = 0x00000003;
4738 
4739 		mutex_lock(&adev->srbm_mutex);
4740 		vi_srbm_select(adev, ring->me,
4741 			       ring->pipe,
4742 			       ring->queue, 0);
4743 
4744 		/* disable wptr polling */
4745 		tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4746 		tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4747 		WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4748 
4749 		mqd->cp_hqd_eop_base_addr_lo =
4750 			RREG32(mmCP_HQD_EOP_BASE_ADDR);
4751 		mqd->cp_hqd_eop_base_addr_hi =
4752 			RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
4753 
4754 		/* enable doorbell? */
4755 		tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4756 		if (use_doorbell) {
4757 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4758 		} else {
4759 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4760 		}
4761 		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4762 		mqd->cp_hqd_pq_doorbell_control = tmp;
4763 
4764 		/* disable the queue if it's active */
4765 		mqd->cp_hqd_dequeue_request = 0;
4766 		mqd->cp_hqd_pq_rptr = 0;
4767 		mqd->cp_hqd_pq_wptr= 0;
4768 		if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4769 			WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4770 			for (j = 0; j < adev->usec_timeout; j++) {
4771 				if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4772 					break;
4773 				udelay(1);
4774 			}
4775 			WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4776 			WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4777 			WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4778 		}
4779 
4780 		/* set the pointer to the MQD */
4781 		mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4782 		mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4783 		WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4784 		WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4785 
4786 		/* set MQD vmid to 0 */
4787 		tmp = RREG32(mmCP_MQD_CONTROL);
4788 		tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4789 		WREG32(mmCP_MQD_CONTROL, tmp);
4790 		mqd->cp_mqd_control = tmp;
4791 
4792 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4793 		hqd_gpu_addr = ring->gpu_addr >> 8;
4794 		mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4795 		mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4796 		WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4797 		WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4798 
4799 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4800 		tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4801 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4802 				    (order_base_2(ring->ring_size / 4) - 1));
4803 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4804 			       ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4805 #ifdef __BIG_ENDIAN
4806 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4807 #endif
4808 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4809 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4810 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4811 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4812 		WREG32(mmCP_HQD_PQ_CONTROL, tmp);
4813 		mqd->cp_hqd_pq_control = tmp;
4814 
4815 		/* set the wb address wether it's enabled or not */
4816 		wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4817 		mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4818 		mqd->cp_hqd_pq_rptr_report_addr_hi =
4819 			upper_32_bits(wb_gpu_addr) & 0xffff;
4820 		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4821 		       mqd->cp_hqd_pq_rptr_report_addr_lo);
4822 		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4823 		       mqd->cp_hqd_pq_rptr_report_addr_hi);
4824 
4825 		/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4826 		wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4827 		mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4828 		mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4829 		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
4830 		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4831 		       mqd->cp_hqd_pq_wptr_poll_addr_hi);
4832 
4833 		/* enable the doorbell if requested */
4834 		if (use_doorbell) {
4835 			if ((adev->asic_type == CHIP_CARRIZO) ||
4836 			    (adev->asic_type == CHIP_FIJI) ||
4837 			    (adev->asic_type == CHIP_STONEY) ||
4838 			    (adev->asic_type == CHIP_POLARIS11) ||
4839 			    (adev->asic_type == CHIP_POLARIS10)) {
4840 				WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4841 				       AMDGPU_DOORBELL_KIQ << 2);
4842 				WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4843 				       AMDGPU_DOORBELL_MEC_RING7 << 2);
4844 			}
4845 			tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4846 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4847 					    DOORBELL_OFFSET, ring->doorbell_index);
4848 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4849 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4850 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4851 			mqd->cp_hqd_pq_doorbell_control = tmp;
4852 
4853 		} else {
4854 			mqd->cp_hqd_pq_doorbell_control = 0;
4855 		}
4856 		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
4857 		       mqd->cp_hqd_pq_doorbell_control);
4858 
4859 		/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4860 		ring->wptr = 0;
4861 		mqd->cp_hqd_pq_wptr = ring->wptr;
4862 		WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4863 		mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4864 
4865 		/* set the vmid for the queue */
4866 		mqd->cp_hqd_vmid = 0;
4867 		WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4868 
4869 		tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4870 		tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4871 		WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
4872 		mqd->cp_hqd_persistent_state = tmp;
4873 		if (adev->asic_type == CHIP_STONEY ||
4874 			adev->asic_type == CHIP_POLARIS11 ||
4875 			adev->asic_type == CHIP_POLARIS10) {
4876 			tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
4877 			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
4878 			WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
4879 		}
4880 
4881 		/* activate the queue */
4882 		mqd->cp_hqd_active = 1;
4883 		WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4884 
4885 		vi_srbm_select(adev, 0, 0, 0, 0);
4886 		mutex_unlock(&adev->srbm_mutex);
4887 
4888 		amdgpu_bo_kunmap(ring->mqd_obj);
4889 		amdgpu_bo_unreserve(ring->mqd_obj);
4890 	}
4891 
4892 	if (use_doorbell) {
4893 		tmp = RREG32(mmCP_PQ_STATUS);
4894 		tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4895 		WREG32(mmCP_PQ_STATUS, tmp);
4896 	}
4897 
4898 	gfx_v8_0_cp_compute_enable(adev, true);
4899 
4900 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4901 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4902 
4903 		ring->ready = true;
4904 		r = amdgpu_ring_test_ring(ring);
4905 		if (r)
4906 			ring->ready = false;
4907 	}
4908 
4909 	return 0;
4910 }
4911 
4912 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4913 {
4914 	int r;
4915 
4916 	if (!(adev->flags & AMD_IS_APU))
4917 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4918 
4919 	if (!adev->pp_enabled) {
4920 		if (!adev->firmware.smu_load) {
4921 			/* legacy firmware loading */
4922 			r = gfx_v8_0_cp_gfx_load_microcode(adev);
4923 			if (r)
4924 				return r;
4925 
4926 			r = gfx_v8_0_cp_compute_load_microcode(adev);
4927 			if (r)
4928 				return r;
4929 		} else {
4930 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4931 							AMDGPU_UCODE_ID_CP_CE);
4932 			if (r)
4933 				return -EINVAL;
4934 
4935 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4936 							AMDGPU_UCODE_ID_CP_PFP);
4937 			if (r)
4938 				return -EINVAL;
4939 
4940 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4941 							AMDGPU_UCODE_ID_CP_ME);
4942 			if (r)
4943 				return -EINVAL;
4944 
4945 			if (adev->asic_type == CHIP_TOPAZ) {
4946 				r = gfx_v8_0_cp_compute_load_microcode(adev);
4947 				if (r)
4948 					return r;
4949 			} else {
4950 				r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4951 										 AMDGPU_UCODE_ID_CP_MEC1);
4952 				if (r)
4953 					return -EINVAL;
4954 			}
4955 		}
4956 	}
4957 
4958 	r = gfx_v8_0_cp_gfx_resume(adev);
4959 	if (r)
4960 		return r;
4961 
4962 	r = gfx_v8_0_cp_compute_resume(adev);
4963 	if (r)
4964 		return r;
4965 
4966 	gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4967 
4968 	return 0;
4969 }
4970 
4971 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4972 {
4973 	gfx_v8_0_cp_gfx_enable(adev, enable);
4974 	gfx_v8_0_cp_compute_enable(adev, enable);
4975 }
4976 
4977 static int gfx_v8_0_hw_init(void *handle)
4978 {
4979 	int r;
4980 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4981 
4982 	gfx_v8_0_init_golden_registers(adev);
4983 
4984 	gfx_v8_0_gpu_init(adev);
4985 
4986 	r = gfx_v8_0_rlc_resume(adev);
4987 	if (r)
4988 		return r;
4989 
4990 	r = gfx_v8_0_cp_resume(adev);
4991 	if (r)
4992 		return r;
4993 
4994 	return r;
4995 }
4996 
4997 static int gfx_v8_0_hw_fini(void *handle)
4998 {
4999 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5000 
5001 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5002 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5003 	gfx_v8_0_cp_enable(adev, false);
5004 	gfx_v8_0_rlc_stop(adev);
5005 	gfx_v8_0_cp_compute_fini(adev);
5006 
5007 	amdgpu_set_powergating_state(adev,
5008 			AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5009 
5010 	return 0;
5011 }
5012 
5013 static int gfx_v8_0_suspend(void *handle)
5014 {
5015 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5016 
5017 	return gfx_v8_0_hw_fini(adev);
5018 }
5019 
5020 static int gfx_v8_0_resume(void *handle)
5021 {
5022 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5023 
5024 	return gfx_v8_0_hw_init(adev);
5025 }
5026 
5027 static bool gfx_v8_0_is_idle(void *handle)
5028 {
5029 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5030 
5031 	if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5032 		return false;
5033 	else
5034 		return true;
5035 }
5036 
5037 static int gfx_v8_0_wait_for_idle(void *handle)
5038 {
5039 	unsigned i;
5040 	u32 tmp;
5041 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5042 
5043 	for (i = 0; i < adev->usec_timeout; i++) {
5044 		/* read MC_STATUS */
5045 		tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
5046 
5047 		if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
5048 			return 0;
5049 		udelay(1);
5050 	}
5051 	return -ETIMEDOUT;
5052 }
5053 
5054 static int gfx_v8_0_soft_reset(void *handle)
5055 {
5056 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5057 	u32 tmp;
5058 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5059 
5060 	/* GRBM_STATUS */
5061 	tmp = RREG32(mmGRBM_STATUS);
5062 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5063 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5064 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5065 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5066 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5067 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
5068 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5069 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5070 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5071 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5072 	}
5073 
5074 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5075 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5076 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5077 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5078 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5079 	}
5080 
5081 	/* GRBM_STATUS2 */
5082 	tmp = RREG32(mmGRBM_STATUS2);
5083 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5084 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5085 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5086 
5087 	/* SRBM_STATUS */
5088 	tmp = RREG32(mmSRBM_STATUS);
5089 	if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5090 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5091 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5092 
5093 	if (grbm_soft_reset || srbm_soft_reset) {
5094 		/* stop the rlc */
5095 		gfx_v8_0_rlc_stop(adev);
5096 
5097 		/* Disable GFX parsing/prefetching */
5098 		gfx_v8_0_cp_gfx_enable(adev, false);
5099 
5100 		/* Disable MEC parsing/prefetching */
5101 		gfx_v8_0_cp_compute_enable(adev, false);
5102 
5103 		if (grbm_soft_reset || srbm_soft_reset) {
5104 			tmp = RREG32(mmGMCON_DEBUG);
5105 			tmp = REG_SET_FIELD(tmp,
5106 					    GMCON_DEBUG, GFX_STALL, 1);
5107 			tmp = REG_SET_FIELD(tmp,
5108 					    GMCON_DEBUG, GFX_CLEAR, 1);
5109 			WREG32(mmGMCON_DEBUG, tmp);
5110 
5111 			udelay(50);
5112 		}
5113 
5114 		if (grbm_soft_reset) {
5115 			tmp = RREG32(mmGRBM_SOFT_RESET);
5116 			tmp |= grbm_soft_reset;
5117 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5118 			WREG32(mmGRBM_SOFT_RESET, tmp);
5119 			tmp = RREG32(mmGRBM_SOFT_RESET);
5120 
5121 			udelay(50);
5122 
5123 			tmp &= ~grbm_soft_reset;
5124 			WREG32(mmGRBM_SOFT_RESET, tmp);
5125 			tmp = RREG32(mmGRBM_SOFT_RESET);
5126 		}
5127 
5128 		if (srbm_soft_reset) {
5129 			tmp = RREG32(mmSRBM_SOFT_RESET);
5130 			tmp |= srbm_soft_reset;
5131 			dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5132 			WREG32(mmSRBM_SOFT_RESET, tmp);
5133 			tmp = RREG32(mmSRBM_SOFT_RESET);
5134 
5135 			udelay(50);
5136 
5137 			tmp &= ~srbm_soft_reset;
5138 			WREG32(mmSRBM_SOFT_RESET, tmp);
5139 			tmp = RREG32(mmSRBM_SOFT_RESET);
5140 		}
5141 
5142 		if (grbm_soft_reset || srbm_soft_reset) {
5143 			tmp = RREG32(mmGMCON_DEBUG);
5144 			tmp = REG_SET_FIELD(tmp,
5145 					    GMCON_DEBUG, GFX_STALL, 0);
5146 			tmp = REG_SET_FIELD(tmp,
5147 					    GMCON_DEBUG, GFX_CLEAR, 0);
5148 			WREG32(mmGMCON_DEBUG, tmp);
5149 		}
5150 
5151 		/* Wait a little for things to settle down */
5152 		udelay(50);
5153 	}
5154 	return 0;
5155 }
5156 
5157 /**
5158  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5159  *
5160  * @adev: amdgpu_device pointer
5161  *
5162  * Fetches a GPU clock counter snapshot.
5163  * Returns the 64 bit clock counter snapshot.
5164  */
5165 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5166 {
5167 	uint64_t clock;
5168 
5169 	mutex_lock(&adev->gfx.gpu_clock_mutex);
5170 	WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5171 	clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5172 		((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5173 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
5174 	return clock;
5175 }
5176 
5177 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5178 					  uint32_t vmid,
5179 					  uint32_t gds_base, uint32_t gds_size,
5180 					  uint32_t gws_base, uint32_t gws_size,
5181 					  uint32_t oa_base, uint32_t oa_size)
5182 {
5183 	gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5184 	gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5185 
5186 	gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5187 	gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5188 
5189 	oa_base = oa_base >> AMDGPU_OA_SHIFT;
5190 	oa_size = oa_size >> AMDGPU_OA_SHIFT;
5191 
5192 	/* GDS Base */
5193 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5194 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5195 				WRITE_DATA_DST_SEL(0)));
5196 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5197 	amdgpu_ring_write(ring, 0);
5198 	amdgpu_ring_write(ring, gds_base);
5199 
5200 	/* GDS Size */
5201 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5202 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5203 				WRITE_DATA_DST_SEL(0)));
5204 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5205 	amdgpu_ring_write(ring, 0);
5206 	amdgpu_ring_write(ring, gds_size);
5207 
5208 	/* GWS */
5209 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5210 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5211 				WRITE_DATA_DST_SEL(0)));
5212 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5213 	amdgpu_ring_write(ring, 0);
5214 	amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5215 
5216 	/* OA */
5217 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5218 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5219 				WRITE_DATA_DST_SEL(0)));
5220 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5221 	amdgpu_ring_write(ring, 0);
5222 	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5223 }
5224 
5225 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5226 	.get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5227 	.select_se_sh = &gfx_v8_0_select_se_sh,
5228 };
5229 
5230 static int gfx_v8_0_early_init(void *handle)
5231 {
5232 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5233 
5234 	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5235 	adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5236 	adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5237 	gfx_v8_0_set_ring_funcs(adev);
5238 	gfx_v8_0_set_irq_funcs(adev);
5239 	gfx_v8_0_set_gds_init(adev);
5240 	gfx_v8_0_set_rlc_funcs(adev);
5241 
5242 	return 0;
5243 }
5244 
5245 static int gfx_v8_0_late_init(void *handle)
5246 {
5247 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5248 	int r;
5249 
5250 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5251 	if (r)
5252 		return r;
5253 
5254 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5255 	if (r)
5256 		return r;
5257 
5258 	/* requires IBs so do in late init after IB pool is initialized */
5259 	r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5260 	if (r)
5261 		return r;
5262 
5263 	amdgpu_set_powergating_state(adev,
5264 			AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5265 
5266 	return 0;
5267 }
5268 
5269 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5270 						       bool enable)
5271 {
5272 	uint32_t data, temp;
5273 
5274 	if (adev->asic_type == CHIP_POLARIS11)
5275 		/* Send msg to SMU via Powerplay */
5276 		amdgpu_set_powergating_state(adev,
5277 					     AMD_IP_BLOCK_TYPE_SMC,
5278 					     enable ?
5279 					     AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5280 
5281 	temp = data = RREG32(mmRLC_PG_CNTL);
5282 	/* Enable static MGPG */
5283 	if (enable)
5284 		data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
5285 	else
5286 		data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
5287 
5288 	if (temp != data)
5289 		WREG32(mmRLC_PG_CNTL, data);
5290 }
5291 
5292 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5293 							bool enable)
5294 {
5295 	uint32_t data, temp;
5296 
5297 	temp = data = RREG32(mmRLC_PG_CNTL);
5298 	/* Enable dynamic MGPG */
5299 	if (enable)
5300 		data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
5301 	else
5302 		data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
5303 
5304 	if (temp != data)
5305 		WREG32(mmRLC_PG_CNTL, data);
5306 }
5307 
5308 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5309 		bool enable)
5310 {
5311 	uint32_t data, temp;
5312 
5313 	temp = data = RREG32(mmRLC_PG_CNTL);
5314 	/* Enable quick PG */
5315 	if (enable)
5316 		data |= RLC_PG_CNTL__QUICK_PG_ENABLE_MASK;
5317 	else
5318 		data &= ~RLC_PG_CNTL__QUICK_PG_ENABLE_MASK;
5319 
5320 	if (temp != data)
5321 		WREG32(mmRLC_PG_CNTL, data);
5322 }
5323 
5324 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5325 					  bool enable)
5326 {
5327 	u32 data, orig;
5328 
5329 	orig = data = RREG32(mmRLC_PG_CNTL);
5330 
5331 	if (enable)
5332 		data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5333 	else
5334 		data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5335 
5336 	if (orig != data)
5337 		WREG32(mmRLC_PG_CNTL, data);
5338 }
5339 
5340 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5341 						bool enable)
5342 {
5343 	u32 data, orig;
5344 
5345 	orig = data = RREG32(mmRLC_PG_CNTL);
5346 
5347 	if (enable)
5348 		data |= RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK;
5349 	else
5350 		data &= ~RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK;
5351 
5352 	if (orig != data)
5353 		WREG32(mmRLC_PG_CNTL, data);
5354 
5355 	/* Read any GFX register to wake up GFX. */
5356 	if (!enable)
5357 		data = RREG32(mmDB_RENDER_CONTROL);
5358 }
5359 
5360 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5361 					  bool enable)
5362 {
5363 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5364 		cz_enable_gfx_cg_power_gating(adev, true);
5365 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5366 			cz_enable_gfx_pipeline_power_gating(adev, true);
5367 	} else {
5368 		cz_enable_gfx_cg_power_gating(adev, false);
5369 		cz_enable_gfx_pipeline_power_gating(adev, false);
5370 	}
5371 }
5372 
5373 static int gfx_v8_0_set_powergating_state(void *handle,
5374 					  enum amd_powergating_state state)
5375 {
5376 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5377 	bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
5378 
5379 	if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5380 		return 0;
5381 
5382 	switch (adev->asic_type) {
5383 	case CHIP_CARRIZO:
5384 	case CHIP_STONEY:
5385 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)
5386 			cz_update_gfx_cg_power_gating(adev, enable);
5387 
5388 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5389 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5390 		else
5391 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5392 
5393 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5394 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5395 		else
5396 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5397 		break;
5398 	case CHIP_POLARIS11:
5399 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5400 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5401 		else
5402 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5403 
5404 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5405 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5406 		else
5407 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5408 
5409 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5410 			polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5411 		else
5412 			polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5413 		break;
5414 	default:
5415 		break;
5416 	}
5417 
5418 	return 0;
5419 }
5420 
5421 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5422 				     uint32_t reg_addr, uint32_t cmd)
5423 {
5424 	uint32_t data;
5425 
5426 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5427 
5428 	WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5429 	WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5430 
5431 	data = RREG32(mmRLC_SERDES_WR_CTRL);
5432 	if (adev->asic_type == CHIP_STONEY)
5433 			data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5434 			RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5435 			RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5436 			RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5437 			RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5438 			RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5439 			RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5440 			RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5441 			RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5442 	else
5443 		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5444 			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5445 			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5446 			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5447 			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5448 			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5449 			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5450 			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5451 			  RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5452 			  RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5453 			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5454 	data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5455 		 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5456 		 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5457 		 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5458 
5459 	WREG32(mmRLC_SERDES_WR_CTRL, data);
5460 }
5461 
5462 #define MSG_ENTER_RLC_SAFE_MODE     1
5463 #define MSG_EXIT_RLC_SAFE_MODE      0
5464 
5465 #define RLC_GPR_REG2__REQ_MASK           0x00000001
5466 #define RLC_GPR_REG2__MESSAGE__SHIFT     0x00000001
5467 #define RLC_GPR_REG2__MESSAGE_MASK       0x0000001e
5468 
5469 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
5470 {
5471 	u32 data = 0;
5472 	unsigned i;
5473 
5474 	data = RREG32(mmRLC_CNTL);
5475 	if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5476 		return;
5477 
5478 	if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5479 	    (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5480 			       AMD_PG_SUPPORT_GFX_DMG))) {
5481 		data |= RLC_GPR_REG2__REQ_MASK;
5482 		data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5483 		data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5484 		WREG32(mmRLC_GPR_REG2, data);
5485 
5486 		for (i = 0; i < adev->usec_timeout; i++) {
5487 			if ((RREG32(mmRLC_GPM_STAT) &
5488 			     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5489 			      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5490 			    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5491 			     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5492 				break;
5493 			udelay(1);
5494 		}
5495 
5496 		for (i = 0; i < adev->usec_timeout; i++) {
5497 			if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
5498 				break;
5499 			udelay(1);
5500 		}
5501 		adev->gfx.rlc.in_safe_mode = true;
5502 	}
5503 }
5504 
5505 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
5506 {
5507 	u32 data;
5508 	unsigned i;
5509 
5510 	data = RREG32(mmRLC_CNTL);
5511 	if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5512 		return;
5513 
5514 	if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5515 	    (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5516 			       AMD_PG_SUPPORT_GFX_DMG))) {
5517 		data |= RLC_GPR_REG2__REQ_MASK;
5518 		data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5519 		data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5520 		WREG32(mmRLC_GPR_REG2, data);
5521 		adev->gfx.rlc.in_safe_mode = false;
5522 	}
5523 
5524 	for (i = 0; i < adev->usec_timeout; i++) {
5525 		if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
5526 			break;
5527 		udelay(1);
5528 	}
5529 }
5530 
5531 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5532 {
5533 	u32 data;
5534 	unsigned i;
5535 
5536 	data = RREG32(mmRLC_CNTL);
5537 	if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5538 		return;
5539 
5540 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5541 		data |= RLC_SAFE_MODE__CMD_MASK;
5542 		data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5543 		data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5544 		WREG32(mmRLC_SAFE_MODE, data);
5545 
5546 		for (i = 0; i < adev->usec_timeout; i++) {
5547 			if ((RREG32(mmRLC_GPM_STAT) &
5548 			     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5549 			      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5550 			    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5551 			     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5552 				break;
5553 			udelay(1);
5554 		}
5555 
5556 		for (i = 0; i < adev->usec_timeout; i++) {
5557 			if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
5558 				break;
5559 			udelay(1);
5560 		}
5561 		adev->gfx.rlc.in_safe_mode = true;
5562 	}
5563 }
5564 
5565 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5566 {
5567 	u32 data = 0;
5568 	unsigned i;
5569 
5570 	data = RREG32(mmRLC_CNTL);
5571 	if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5572 		return;
5573 
5574 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5575 		if (adev->gfx.rlc.in_safe_mode) {
5576 			data |= RLC_SAFE_MODE__CMD_MASK;
5577 			data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5578 			WREG32(mmRLC_SAFE_MODE, data);
5579 			adev->gfx.rlc.in_safe_mode = false;
5580 		}
5581 	}
5582 
5583 	for (i = 0; i < adev->usec_timeout; i++) {
5584 		if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
5585 			break;
5586 		udelay(1);
5587 	}
5588 }
5589 
5590 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
5591 {
5592 	adev->gfx.rlc.in_safe_mode = true;
5593 }
5594 
5595 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
5596 {
5597 	adev->gfx.rlc.in_safe_mode = false;
5598 }
5599 
5600 static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
5601 	.enter_safe_mode = cz_enter_rlc_safe_mode,
5602 	.exit_safe_mode = cz_exit_rlc_safe_mode
5603 };
5604 
5605 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5606 	.enter_safe_mode = iceland_enter_rlc_safe_mode,
5607 	.exit_safe_mode = iceland_exit_rlc_safe_mode
5608 };
5609 
5610 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
5611 	.enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
5612 	.exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
5613 };
5614 
5615 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5616 						      bool enable)
5617 {
5618 	uint32_t temp, data;
5619 
5620 	adev->gfx.rlc.funcs->enter_safe_mode(adev);
5621 
5622 	/* It is disabled by HW by default */
5623 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5624 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5625 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5626 				/* 1 - RLC memory Light sleep */
5627 				temp = data = RREG32(mmRLC_MEM_SLP_CNTL);
5628 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5629 				if (temp != data)
5630 					WREG32(mmRLC_MEM_SLP_CNTL, data);
5631 			}
5632 
5633 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5634 				/* 2 - CP memory Light sleep */
5635 				temp = data = RREG32(mmCP_MEM_SLP_CNTL);
5636 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5637 				if (temp != data)
5638 					WREG32(mmCP_MEM_SLP_CNTL, data);
5639 			}
5640 		}
5641 
5642 		/* 3 - RLC_CGTT_MGCG_OVERRIDE */
5643 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5644 		if (adev->flags & AMD_IS_APU)
5645 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5646 				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5647 				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5648 		else
5649 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5650 				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5651 				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5652 				  RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5653 
5654 		if (temp != data)
5655 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5656 
5657 		/* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5658 		gfx_v8_0_wait_for_rlc_serdes(adev);
5659 
5660 		/* 5 - clear mgcg override */
5661 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5662 
5663 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5664 			/* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5665 			temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5666 			data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5667 			data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5668 			data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5669 			data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5670 			if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5671 			    (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5672 				data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5673 			data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5674 			data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5675 			if (temp != data)
5676 				WREG32(mmCGTS_SM_CTRL_REG, data);
5677 		}
5678 		udelay(50);
5679 
5680 		/* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5681 		gfx_v8_0_wait_for_rlc_serdes(adev);
5682 	} else {
5683 		/* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5684 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5685 		data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5686 				RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5687 				RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5688 				RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5689 		if (temp != data)
5690 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5691 
5692 		/* 2 - disable MGLS in RLC */
5693 		data = RREG32(mmRLC_MEM_SLP_CNTL);
5694 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5695 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5696 			WREG32(mmRLC_MEM_SLP_CNTL, data);
5697 		}
5698 
5699 		/* 3 - disable MGLS in CP */
5700 		data = RREG32(mmCP_MEM_SLP_CNTL);
5701 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5702 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5703 			WREG32(mmCP_MEM_SLP_CNTL, data);
5704 		}
5705 
5706 		/* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5707 		temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5708 		data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5709 				CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5710 		if (temp != data)
5711 			WREG32(mmCGTS_SM_CTRL_REG, data);
5712 
5713 		/* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5714 		gfx_v8_0_wait_for_rlc_serdes(adev);
5715 
5716 		/* 6 - set mgcg override */
5717 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5718 
5719 		udelay(50);
5720 
5721 		/* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5722 		gfx_v8_0_wait_for_rlc_serdes(adev);
5723 	}
5724 
5725 	adev->gfx.rlc.funcs->exit_safe_mode(adev);
5726 }
5727 
5728 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5729 						      bool enable)
5730 {
5731 	uint32_t temp, temp1, data, data1;
5732 
5733 	temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5734 
5735 	adev->gfx.rlc.funcs->enter_safe_mode(adev);
5736 
5737 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5738 		/* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
5739 		 * Cmp_busy/GFX_Idle interrupts
5740 		 */
5741 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5742 
5743 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5744 		data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5745 		if (temp1 != data1)
5746 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5747 
5748 		/* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5749 		gfx_v8_0_wait_for_rlc_serdes(adev);
5750 
5751 		/* 3 - clear cgcg override */
5752 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5753 
5754 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5755 		gfx_v8_0_wait_for_rlc_serdes(adev);
5756 
5757 		/* 4 - write cmd to set CGLS */
5758 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5759 
5760 		/* 5 - enable cgcg */
5761 		data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5762 
5763 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5764 			/* enable cgls*/
5765 			data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5766 
5767 			temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5768 			data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5769 
5770 			if (temp1 != data1)
5771 				WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5772 		} else {
5773 			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5774 		}
5775 
5776 		if (temp != data)
5777 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5778 	} else {
5779 		/* disable cntx_empty_int_enable & GFX Idle interrupt */
5780 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5781 
5782 		/* TEST CGCG */
5783 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5784 		data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5785 				RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5786 		if (temp1 != data1)
5787 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5788 
5789 		/* read gfx register to wake up cgcg */
5790 		RREG32(mmCB_CGTT_SCLK_CTRL);
5791 		RREG32(mmCB_CGTT_SCLK_CTRL);
5792 		RREG32(mmCB_CGTT_SCLK_CTRL);
5793 		RREG32(mmCB_CGTT_SCLK_CTRL);
5794 
5795 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5796 		gfx_v8_0_wait_for_rlc_serdes(adev);
5797 
5798 		/* write cmd to Set CGCG Overrride */
5799 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5800 
5801 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5802 		gfx_v8_0_wait_for_rlc_serdes(adev);
5803 
5804 		/* write cmd to Clear CGLS */
5805 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5806 
5807 		/* disable cgcg, cgls should be disabled too. */
5808 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5809 			  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5810 		if (temp != data)
5811 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5812 	}
5813 
5814 	gfx_v8_0_wait_for_rlc_serdes(adev);
5815 
5816 	adev->gfx.rlc.funcs->exit_safe_mode(adev);
5817 }
5818 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5819 					    bool enable)
5820 {
5821 	if (enable) {
5822 		/* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5823 		 * ===  MGCG + MGLS + TS(CG/LS) ===
5824 		 */
5825 		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5826 		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5827 	} else {
5828 		/* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5829 		 * ===  CGCG + CGLS ===
5830 		 */
5831 		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5832 		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5833 	}
5834 	return 0;
5835 }
5836 
5837 static int gfx_v8_0_set_clockgating_state(void *handle,
5838 					  enum amd_clockgating_state state)
5839 {
5840 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5841 
5842 	switch (adev->asic_type) {
5843 	case CHIP_FIJI:
5844 	case CHIP_CARRIZO:
5845 	case CHIP_STONEY:
5846 		gfx_v8_0_update_gfx_clock_gating(adev,
5847 						 state == AMD_CG_STATE_GATE ? true : false);
5848 		break;
5849 	default:
5850 		break;
5851 	}
5852 	return 0;
5853 }
5854 
5855 static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5856 {
5857 	u32 rptr;
5858 
5859 	rptr = ring->adev->wb.wb[ring->rptr_offs];
5860 
5861 	return rptr;
5862 }
5863 
5864 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5865 {
5866 	struct amdgpu_device *adev = ring->adev;
5867 	u32 wptr;
5868 
5869 	if (ring->use_doorbell)
5870 		/* XXX check if swapping is necessary on BE */
5871 		wptr = ring->adev->wb.wb[ring->wptr_offs];
5872 	else
5873 		wptr = RREG32(mmCP_RB0_WPTR);
5874 
5875 	return wptr;
5876 }
5877 
5878 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5879 {
5880 	struct amdgpu_device *adev = ring->adev;
5881 
5882 	if (ring->use_doorbell) {
5883 		/* XXX check if swapping is necessary on BE */
5884 		adev->wb.wb[ring->wptr_offs] = ring->wptr;
5885 		WDOORBELL32(ring->doorbell_index, ring->wptr);
5886 	} else {
5887 		WREG32(mmCP_RB0_WPTR, ring->wptr);
5888 		(void)RREG32(mmCP_RB0_WPTR);
5889 	}
5890 }
5891 
5892 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5893 {
5894 	u32 ref_and_mask, reg_mem_engine;
5895 
5896 	if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
5897 		switch (ring->me) {
5898 		case 1:
5899 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
5900 			break;
5901 		case 2:
5902 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
5903 			break;
5904 		default:
5905 			return;
5906 		}
5907 		reg_mem_engine = 0;
5908 	} else {
5909 		ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
5910 		reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
5911 	}
5912 
5913 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5914 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
5915 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
5916 				 reg_mem_engine));
5917 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
5918 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
5919 	amdgpu_ring_write(ring, ref_and_mask);
5920 	amdgpu_ring_write(ring, ref_and_mask);
5921 	amdgpu_ring_write(ring, 0x20); /* poll interval */
5922 }
5923 
5924 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
5925 {
5926 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5927 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5928 				 WRITE_DATA_DST_SEL(0) |
5929 				 WR_CONFIRM));
5930 	amdgpu_ring_write(ring, mmHDP_DEBUG0);
5931 	amdgpu_ring_write(ring, 0);
5932 	amdgpu_ring_write(ring, 1);
5933 
5934 }
5935 
5936 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5937 				      struct amdgpu_ib *ib,
5938 				      unsigned vm_id, bool ctx_switch)
5939 {
5940 	u32 header, control = 0;
5941 
5942 	/* insert SWITCH_BUFFER packet before first IB in the ring frame */
5943 	if (ctx_switch) {
5944 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5945 		amdgpu_ring_write(ring, 0);
5946 	}
5947 
5948 	if (ib->flags & AMDGPU_IB_FLAG_CE)
5949 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5950 	else
5951 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5952 
5953 	control |= ib->length_dw | (vm_id << 24);
5954 
5955 	amdgpu_ring_write(ring, header);
5956 	amdgpu_ring_write(ring,
5957 #ifdef __BIG_ENDIAN
5958 			  (2 << 0) |
5959 #endif
5960 			  (ib->gpu_addr & 0xFFFFFFFC));
5961 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5962 	amdgpu_ring_write(ring, control);
5963 }
5964 
5965 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5966 					  struct amdgpu_ib *ib,
5967 					  unsigned vm_id, bool ctx_switch)
5968 {
5969 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
5970 
5971 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5972 	amdgpu_ring_write(ring,
5973 #ifdef __BIG_ENDIAN
5974 					  (2 << 0) |
5975 #endif
5976 					  (ib->gpu_addr & 0xFFFFFFFC));
5977 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5978 	amdgpu_ring_write(ring, control);
5979 }
5980 
5981 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
5982 					 u64 seq, unsigned flags)
5983 {
5984 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5985 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5986 
5987 	/* EVENT_WRITE_EOP - flush caches, send int */
5988 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
5989 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
5990 				 EOP_TC_ACTION_EN |
5991 				 EOP_TC_WB_ACTION_EN |
5992 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5993 				 EVENT_INDEX(5)));
5994 	amdgpu_ring_write(ring, addr & 0xfffffffc);
5995 	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
5996 			  DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5997 	amdgpu_ring_write(ring, lower_32_bits(seq));
5998 	amdgpu_ring_write(ring, upper_32_bits(seq));
5999 
6000 }
6001 
6002 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6003 {
6004 	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
6005 	uint32_t seq = ring->fence_drv.sync_seq;
6006 	uint64_t addr = ring->fence_drv.gpu_addr;
6007 
6008 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6009 	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6010 				 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6011 				 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6012 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6013 	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6014 	amdgpu_ring_write(ring, seq);
6015 	amdgpu_ring_write(ring, 0xffffffff);
6016 	amdgpu_ring_write(ring, 4); /* poll interval */
6017 
6018 	if (usepfp) {
6019 		/* synce CE with ME to prevent CE fetch CEIB before context switch done */
6020 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6021 		amdgpu_ring_write(ring, 0);
6022 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6023 		amdgpu_ring_write(ring, 0);
6024 	}
6025 }
6026 
6027 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6028 					unsigned vm_id, uint64_t pd_addr)
6029 {
6030 	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
6031 
6032 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6033 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6034 				 WRITE_DATA_DST_SEL(0)) |
6035 				 WR_CONFIRM);
6036 	if (vm_id < 8) {
6037 		amdgpu_ring_write(ring,
6038 				  (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6039 	} else {
6040 		amdgpu_ring_write(ring,
6041 				  (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6042 	}
6043 	amdgpu_ring_write(ring, 0);
6044 	amdgpu_ring_write(ring, pd_addr >> 12);
6045 
6046 	/* bits 0-15 are the VM contexts0-15 */
6047 	/* invalidate the cache */
6048 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6049 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6050 				 WRITE_DATA_DST_SEL(0)));
6051 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6052 	amdgpu_ring_write(ring, 0);
6053 	amdgpu_ring_write(ring, 1 << vm_id);
6054 
6055 	/* wait for the invalidate to complete */
6056 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6057 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6058 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6059 				 WAIT_REG_MEM_ENGINE(0))); /* me */
6060 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6061 	amdgpu_ring_write(ring, 0);
6062 	amdgpu_ring_write(ring, 0); /* ref */
6063 	amdgpu_ring_write(ring, 0); /* mask */
6064 	amdgpu_ring_write(ring, 0x20); /* poll interval */
6065 
6066 	/* compute doesn't have PFP */
6067 	if (usepfp) {
6068 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6069 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6070 		amdgpu_ring_write(ring, 0x0);
6071 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6072 		amdgpu_ring_write(ring, 0);
6073 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6074 		amdgpu_ring_write(ring, 0);
6075 	}
6076 }
6077 
6078 static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
6079 {
6080 	return ring->adev->wb.wb[ring->rptr_offs];
6081 }
6082 
6083 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6084 {
6085 	return ring->adev->wb.wb[ring->wptr_offs];
6086 }
6087 
6088 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6089 {
6090 	struct amdgpu_device *adev = ring->adev;
6091 
6092 	/* XXX check if swapping is necessary on BE */
6093 	adev->wb.wb[ring->wptr_offs] = ring->wptr;
6094 	WDOORBELL32(ring->doorbell_index, ring->wptr);
6095 }
6096 
6097 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6098 					     u64 addr, u64 seq,
6099 					     unsigned flags)
6100 {
6101 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6102 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6103 
6104 	/* RELEASE_MEM - flush caches, send int */
6105 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6106 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6107 				 EOP_TC_ACTION_EN |
6108 				 EOP_TC_WB_ACTION_EN |
6109 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6110 				 EVENT_INDEX(5)));
6111 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6112 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6113 	amdgpu_ring_write(ring, upper_32_bits(addr));
6114 	amdgpu_ring_write(ring, lower_32_bits(seq));
6115 	amdgpu_ring_write(ring, upper_32_bits(seq));
6116 }
6117 
6118 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6119 						 enum amdgpu_interrupt_state state)
6120 {
6121 	u32 cp_int_cntl;
6122 
6123 	switch (state) {
6124 	case AMDGPU_IRQ_STATE_DISABLE:
6125 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6126 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6127 					    TIME_STAMP_INT_ENABLE, 0);
6128 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6129 		break;
6130 	case AMDGPU_IRQ_STATE_ENABLE:
6131 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6132 		cp_int_cntl =
6133 			REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6134 				      TIME_STAMP_INT_ENABLE, 1);
6135 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6136 		break;
6137 	default:
6138 		break;
6139 	}
6140 }
6141 
6142 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6143 						     int me, int pipe,
6144 						     enum amdgpu_interrupt_state state)
6145 {
6146 	u32 mec_int_cntl, mec_int_cntl_reg;
6147 
6148 	/*
6149 	 * amdgpu controls only pipe 0 of MEC1. That's why this function only
6150 	 * handles the setting of interrupts for this specific pipe. All other
6151 	 * pipes' interrupts are set by amdkfd.
6152 	 */
6153 
6154 	if (me == 1) {
6155 		switch (pipe) {
6156 		case 0:
6157 			mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6158 			break;
6159 		default:
6160 			DRM_DEBUG("invalid pipe %d\n", pipe);
6161 			return;
6162 		}
6163 	} else {
6164 		DRM_DEBUG("invalid me %d\n", me);
6165 		return;
6166 	}
6167 
6168 	switch (state) {
6169 	case AMDGPU_IRQ_STATE_DISABLE:
6170 		mec_int_cntl = RREG32(mec_int_cntl_reg);
6171 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6172 					     TIME_STAMP_INT_ENABLE, 0);
6173 		WREG32(mec_int_cntl_reg, mec_int_cntl);
6174 		break;
6175 	case AMDGPU_IRQ_STATE_ENABLE:
6176 		mec_int_cntl = RREG32(mec_int_cntl_reg);
6177 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6178 					     TIME_STAMP_INT_ENABLE, 1);
6179 		WREG32(mec_int_cntl_reg, mec_int_cntl);
6180 		break;
6181 	default:
6182 		break;
6183 	}
6184 }
6185 
6186 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6187 					     struct amdgpu_irq_src *source,
6188 					     unsigned type,
6189 					     enum amdgpu_interrupt_state state)
6190 {
6191 	u32 cp_int_cntl;
6192 
6193 	switch (state) {
6194 	case AMDGPU_IRQ_STATE_DISABLE:
6195 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6196 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6197 					    PRIV_REG_INT_ENABLE, 0);
6198 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6199 		break;
6200 	case AMDGPU_IRQ_STATE_ENABLE:
6201 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6202 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6203 					    PRIV_REG_INT_ENABLE, 1);
6204 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6205 		break;
6206 	default:
6207 		break;
6208 	}
6209 
6210 	return 0;
6211 }
6212 
6213 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6214 					      struct amdgpu_irq_src *source,
6215 					      unsigned type,
6216 					      enum amdgpu_interrupt_state state)
6217 {
6218 	u32 cp_int_cntl;
6219 
6220 	switch (state) {
6221 	case AMDGPU_IRQ_STATE_DISABLE:
6222 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6223 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6224 					    PRIV_INSTR_INT_ENABLE, 0);
6225 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6226 		break;
6227 	case AMDGPU_IRQ_STATE_ENABLE:
6228 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6229 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6230 					    PRIV_INSTR_INT_ENABLE, 1);
6231 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6232 		break;
6233 	default:
6234 		break;
6235 	}
6236 
6237 	return 0;
6238 }
6239 
6240 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6241 					    struct amdgpu_irq_src *src,
6242 					    unsigned type,
6243 					    enum amdgpu_interrupt_state state)
6244 {
6245 	switch (type) {
6246 	case AMDGPU_CP_IRQ_GFX_EOP:
6247 		gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6248 		break;
6249 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6250 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6251 		break;
6252 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6253 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6254 		break;
6255 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6256 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6257 		break;
6258 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6259 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6260 		break;
6261 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6262 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6263 		break;
6264 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6265 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6266 		break;
6267 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6268 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6269 		break;
6270 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6271 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6272 		break;
6273 	default:
6274 		break;
6275 	}
6276 	return 0;
6277 }
6278 
6279 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6280 			    struct amdgpu_irq_src *source,
6281 			    struct amdgpu_iv_entry *entry)
6282 {
6283 	int i;
6284 	u8 me_id, pipe_id, queue_id;
6285 	struct amdgpu_ring *ring;
6286 
6287 	DRM_DEBUG("IH: CP EOP\n");
6288 	me_id = (entry->ring_id & 0x0c) >> 2;
6289 	pipe_id = (entry->ring_id & 0x03) >> 0;
6290 	queue_id = (entry->ring_id & 0x70) >> 4;
6291 
6292 	switch (me_id) {
6293 	case 0:
6294 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6295 		break;
6296 	case 1:
6297 	case 2:
6298 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6299 			ring = &adev->gfx.compute_ring[i];
6300 			/* Per-queue interrupt is supported for MEC starting from VI.
6301 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
6302 			  */
6303 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6304 				amdgpu_fence_process(ring);
6305 		}
6306 		break;
6307 	}
6308 	return 0;
6309 }
6310 
6311 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6312 				 struct amdgpu_irq_src *source,
6313 				 struct amdgpu_iv_entry *entry)
6314 {
6315 	DRM_ERROR("Illegal register access in command stream\n");
6316 	schedule_work(&adev->reset_work);
6317 	return 0;
6318 }
6319 
6320 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6321 				  struct amdgpu_irq_src *source,
6322 				  struct amdgpu_iv_entry *entry)
6323 {
6324 	DRM_ERROR("Illegal instruction in command stream\n");
6325 	schedule_work(&adev->reset_work);
6326 	return 0;
6327 }
6328 
6329 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6330 	.name = "gfx_v8_0",
6331 	.early_init = gfx_v8_0_early_init,
6332 	.late_init = gfx_v8_0_late_init,
6333 	.sw_init = gfx_v8_0_sw_init,
6334 	.sw_fini = gfx_v8_0_sw_fini,
6335 	.hw_init = gfx_v8_0_hw_init,
6336 	.hw_fini = gfx_v8_0_hw_fini,
6337 	.suspend = gfx_v8_0_suspend,
6338 	.resume = gfx_v8_0_resume,
6339 	.is_idle = gfx_v8_0_is_idle,
6340 	.wait_for_idle = gfx_v8_0_wait_for_idle,
6341 	.soft_reset = gfx_v8_0_soft_reset,
6342 	.set_clockgating_state = gfx_v8_0_set_clockgating_state,
6343 	.set_powergating_state = gfx_v8_0_set_powergating_state,
6344 };
6345 
6346 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6347 	.get_rptr = gfx_v8_0_ring_get_rptr_gfx,
6348 	.get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6349 	.set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6350 	.parse_cs = NULL,
6351 	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6352 	.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6353 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6354 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6355 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6356 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6357 	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6358 	.test_ring = gfx_v8_0_ring_test_ring,
6359 	.test_ib = gfx_v8_0_ring_test_ib,
6360 	.insert_nop = amdgpu_ring_insert_nop,
6361 	.pad_ib = amdgpu_ring_generic_pad_ib,
6362 };
6363 
6364 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6365 	.get_rptr = gfx_v8_0_ring_get_rptr_compute,
6366 	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
6367 	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
6368 	.parse_cs = NULL,
6369 	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
6370 	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
6371 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6372 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6373 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6374 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6375 	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6376 	.test_ring = gfx_v8_0_ring_test_ring,
6377 	.test_ib = gfx_v8_0_ring_test_ib,
6378 	.insert_nop = amdgpu_ring_insert_nop,
6379 	.pad_ib = amdgpu_ring_generic_pad_ib,
6380 };
6381 
6382 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6383 {
6384 	int i;
6385 
6386 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6387 		adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6388 
6389 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6390 		adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6391 }
6392 
6393 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6394 	.set = gfx_v8_0_set_eop_interrupt_state,
6395 	.process = gfx_v8_0_eop_irq,
6396 };
6397 
6398 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6399 	.set = gfx_v8_0_set_priv_reg_fault_state,
6400 	.process = gfx_v8_0_priv_reg_irq,
6401 };
6402 
6403 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6404 	.set = gfx_v8_0_set_priv_inst_fault_state,
6405 	.process = gfx_v8_0_priv_inst_irq,
6406 };
6407 
6408 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6409 {
6410 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6411 	adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6412 
6413 	adev->gfx.priv_reg_irq.num_types = 1;
6414 	adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6415 
6416 	adev->gfx.priv_inst_irq.num_types = 1;
6417 	adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6418 }
6419 
6420 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6421 {
6422 	switch (adev->asic_type) {
6423 	case CHIP_TOPAZ:
6424 		adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6425 		break;
6426 	case CHIP_STONEY:
6427 	case CHIP_CARRIZO:
6428 		adev->gfx.rlc.funcs = &cz_rlc_funcs;
6429 		break;
6430 	default:
6431 		adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
6432 		break;
6433 	}
6434 }
6435 
6436 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6437 {
6438 	/* init asci gds info */
6439 	adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6440 	adev->gds.gws.total_size = 64;
6441 	adev->gds.oa.total_size = 16;
6442 
6443 	if (adev->gds.mem.total_size == 64 * 1024) {
6444 		adev->gds.mem.gfx_partition_size = 4096;
6445 		adev->gds.mem.cs_partition_size = 4096;
6446 
6447 		adev->gds.gws.gfx_partition_size = 4;
6448 		adev->gds.gws.cs_partition_size = 4;
6449 
6450 		adev->gds.oa.gfx_partition_size = 4;
6451 		adev->gds.oa.cs_partition_size = 1;
6452 	} else {
6453 		adev->gds.mem.gfx_partition_size = 1024;
6454 		adev->gds.mem.cs_partition_size = 1024;
6455 
6456 		adev->gds.gws.gfx_partition_size = 16;
6457 		adev->gds.gws.cs_partition_size = 16;
6458 
6459 		adev->gds.oa.gfx_partition_size = 4;
6460 		adev->gds.oa.cs_partition_size = 4;
6461 	}
6462 }
6463 
6464 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6465 						 u32 bitmap)
6466 {
6467 	u32 data;
6468 
6469 	if (!bitmap)
6470 		return;
6471 
6472 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6473 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6474 
6475 	WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6476 }
6477 
6478 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6479 {
6480 	u32 data, mask;
6481 
6482 	data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
6483 	data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6484 
6485 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6486 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6487 
6488 	mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
6489 
6490 	return (~data) & mask;
6491 }
6492 
6493 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6494 {
6495 	int i, j, k, counter, active_cu_number = 0;
6496 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6497 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6498 	unsigned disable_masks[4 * 2];
6499 
6500 	memset(cu_info, 0, sizeof(*cu_info));
6501 
6502 	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
6503 
6504 	mutex_lock(&adev->grbm_idx_mutex);
6505 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6506 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6507 			mask = 1;
6508 			ao_bitmap = 0;
6509 			counter = 0;
6510 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
6511 			if (i < 4 && j < 2)
6512 				gfx_v8_0_set_user_cu_inactive_bitmap(
6513 					adev, disable_masks[i * 2 + j]);
6514 			bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6515 			cu_info->bitmap[i][j] = bitmap;
6516 
6517 			for (k = 0; k < 16; k ++) {
6518 				if (bitmap & mask) {
6519 					if (counter < 2)
6520 						ao_bitmap |= mask;
6521 					counter ++;
6522 				}
6523 				mask <<= 1;
6524 			}
6525 			active_cu_number += counter;
6526 			ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6527 		}
6528 	}
6529 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6530 	mutex_unlock(&adev->grbm_idx_mutex);
6531 
6532 	cu_info->number = active_cu_number;
6533 	cu_info->ao_cu_mask = ao_cu_mask;
6534 }
6535