xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c (revision 0883c2c0)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vid.h"
29 #include "amdgpu_ucode.h"
30 #include "amdgpu_atombios.h"
31 #include "clearstate_vi.h"
32 
33 #include "gmc/gmc_8_2_d.h"
34 #include "gmc/gmc_8_2_sh_mask.h"
35 
36 #include "oss/oss_3_0_d.h"
37 #include "oss/oss_3_0_sh_mask.h"
38 
39 #include "bif/bif_5_0_d.h"
40 #include "bif/bif_5_0_sh_mask.h"
41 
42 #include "gca/gfx_8_0_d.h"
43 #include "gca/gfx_8_0_enum.h"
44 #include "gca/gfx_8_0_sh_mask.h"
45 #include "gca/gfx_8_0_enum.h"
46 
47 #include "dce/dce_10_0_d.h"
48 #include "dce/dce_10_0_sh_mask.h"
49 
50 #define GFX8_NUM_GFX_RINGS     1
51 #define GFX8_NUM_COMPUTE_RINGS 8
52 
53 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
54 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
55 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
56 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
57 
58 #define ARRAY_MODE(x)					((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
59 #define PIPE_CONFIG(x)					((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
60 #define TILE_SPLIT(x)					((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
61 #define MICRO_TILE_MODE_NEW(x)				((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
62 #define SAMPLE_SPLIT(x)					((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
63 #define BANK_WIDTH(x)					((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
64 #define BANK_HEIGHT(x)					((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
65 #define MACRO_TILE_ASPECT(x)				((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
66 #define NUM_BANKS(x)					((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
67 
68 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
69 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
70 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
71 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
72 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
73 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
74 
75 /* BPM SERDES CMD */
76 #define SET_BPM_SERDES_CMD    1
77 #define CLE_BPM_SERDES_CMD    0
78 
79 /* BPM Register Address*/
80 enum {
81 	BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
82 	BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
83 	BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
84 	BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
85 	BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
86 	BPM_REG_FGCG_MAX
87 };
88 
89 #define RLC_FormatDirectRegListLength        14
90 
91 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
92 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
93 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
94 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
95 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
97 
98 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
99 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
100 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
101 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
103 
104 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
105 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
106 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
107 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
110 
111 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
112 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
113 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
114 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
116 
117 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
118 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
119 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
120 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
123 
124 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
125 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
126 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
127 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
128 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
130 
131 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
132 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
133 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
134 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
137 
138 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
139 {
140 	{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
141 	{mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
142 	{mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
143 	{mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
144 	{mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
145 	{mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
146 	{mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
147 	{mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
148 	{mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
149 	{mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
150 	{mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
151 	{mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
152 	{mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
153 	{mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
154 	{mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
155 	{mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
156 };
157 
158 static const u32 golden_settings_tonga_a11[] =
159 {
160 	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
161 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
162 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
163 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
164 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
165 	mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
166 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
167 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
168 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
169 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
170 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
171 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
172 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
173 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
174 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
175 };
176 
177 static const u32 tonga_golden_common_all[] =
178 {
179 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
180 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
181 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
182 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
183 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
184 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
185 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
186 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
187 };
188 
189 static const u32 tonga_mgcg_cgcg_init[] =
190 {
191 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
192 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
193 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
194 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
195 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
196 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
197 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
198 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
199 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
200 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
201 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
202 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
203 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
204 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
205 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
206 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
207 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
208 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
209 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
210 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
211 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
212 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
213 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
214 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
215 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
216 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
217 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
218 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
219 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
220 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
221 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
222 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
223 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
224 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
225 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
226 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
227 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
228 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
229 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
230 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
231 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
232 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
233 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
234 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
235 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
236 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
237 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
238 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
239 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
240 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
241 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
242 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
243 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
244 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
245 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
246 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
247 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
248 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
249 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
250 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
251 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
252 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
253 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
254 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
255 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
256 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
257 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
258 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
259 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
260 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
261 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
262 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
263 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
264 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
265 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
266 };
267 
268 static const u32 golden_settings_polaris11_a11[] =
269 {
270 	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00006208,
271 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
272 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
273 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
274 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
275 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
276 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
277 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
278 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
279 	mmSQ_CONFIG, 0x07f80000, 0x07180000,
280 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
281 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
282 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
283 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
284 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
285 };
286 
287 static const u32 polaris11_golden_common_all[] =
288 {
289 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
290 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
291 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
292 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
293 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
294 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
295 };
296 
297 static const u32 golden_settings_polaris10_a11[] =
298 {
299 	mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
300 	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00006208,
301 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
302 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
303 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
304 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
305 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
306 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
307 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
308 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
309 	mmSQ_CONFIG, 0x07f80000, 0x07180000,
310 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
311 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
312 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
313 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
314 };
315 
316 static const u32 polaris10_golden_common_all[] =
317 {
318 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
319 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
320 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
321 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
322 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
323 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
324 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
325 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
326 };
327 
328 static const u32 fiji_golden_common_all[] =
329 {
330 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
331 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
332 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
333 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
334 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
335 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
336 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
337 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
338 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
339 	mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
340 };
341 
342 static const u32 golden_settings_fiji_a10[] =
343 {
344 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
345 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
346 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
347 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
348 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
349 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
350 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
351 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
352 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
353 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
354 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
355 };
356 
357 static const u32 fiji_mgcg_cgcg_init[] =
358 {
359 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
360 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
361 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
362 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
363 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
364 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
365 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
366 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
367 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
368 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
369 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
370 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
371 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
372 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
373 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
374 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
375 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
376 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
377 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
378 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
379 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
380 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
381 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
382 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
383 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
384 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
385 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
386 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
387 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
388 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
389 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
390 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
391 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
392 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
393 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
394 };
395 
396 static const u32 golden_settings_iceland_a11[] =
397 {
398 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
399 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
400 	mmDB_DEBUG3, 0xc0000000, 0xc0000000,
401 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
402 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
403 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
404 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
405 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
406 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
407 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
408 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
409 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
410 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
411 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
412 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
413 };
414 
415 static const u32 iceland_golden_common_all[] =
416 {
417 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
418 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
419 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
420 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
421 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
422 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
423 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
424 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
425 };
426 
427 static const u32 iceland_mgcg_cgcg_init[] =
428 {
429 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
430 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
431 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
432 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
433 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
434 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
435 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
436 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
437 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
438 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
439 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
440 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
441 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
442 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
443 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
444 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
445 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
446 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
447 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
448 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
449 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
450 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
451 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
452 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
453 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
454 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
455 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
456 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
457 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
458 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
459 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
460 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
461 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
462 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
463 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
464 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
465 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
466 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
467 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
468 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
469 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
470 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
471 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
472 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
473 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
474 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
475 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
476 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
477 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
478 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
479 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
480 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
481 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
482 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
483 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
484 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
485 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
486 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
487 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
488 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
489 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
490 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
491 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
492 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
493 };
494 
495 static const u32 cz_golden_settings_a11[] =
496 {
497 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
498 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
499 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
500 	mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
501 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
502 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
503 	mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
504 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
505 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
506 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
507 };
508 
509 static const u32 cz_golden_common_all[] =
510 {
511 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
512 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
513 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
514 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
515 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
516 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
517 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
518 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
519 };
520 
521 static const u32 cz_mgcg_cgcg_init[] =
522 {
523 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
524 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
525 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
526 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
527 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
528 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
529 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
530 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
531 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
532 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
533 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
534 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
535 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
536 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
537 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
538 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
539 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
540 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
541 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
542 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
543 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
544 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
545 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
546 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
547 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
548 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
549 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
550 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
551 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
552 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
553 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
554 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
555 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
556 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
557 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
558 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
559 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
560 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
561 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
562 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
563 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
564 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
565 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
566 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
567 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
568 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
569 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
570 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
571 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
572 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
573 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
574 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
575 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
576 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
577 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
578 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
579 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
580 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
581 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
582 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
583 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
584 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
585 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
586 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
587 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
588 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
589 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
590 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
591 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
592 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
593 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
594 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
595 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
596 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
597 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
598 };
599 
600 static const u32 stoney_golden_settings_a11[] =
601 {
602 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
603 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
604 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
605 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
606 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
607 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
608 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
609 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
610 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
611 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
612 };
613 
614 static const u32 stoney_golden_common_all[] =
615 {
616 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
617 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
618 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
619 	mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
620 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
621 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
622 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
623 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
624 };
625 
626 static const u32 stoney_mgcg_cgcg_init[] =
627 {
628 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
629 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
630 	mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
631 	mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
632 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
633 	mmATC_MISC_CG, 0xffffffff, 0x000c0200,
634 };
635 
636 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
637 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
638 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
639 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
640 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
641 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
642 
643 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
644 {
645 	switch (adev->asic_type) {
646 	case CHIP_TOPAZ:
647 		amdgpu_program_register_sequence(adev,
648 						 iceland_mgcg_cgcg_init,
649 						 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
650 		amdgpu_program_register_sequence(adev,
651 						 golden_settings_iceland_a11,
652 						 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
653 		amdgpu_program_register_sequence(adev,
654 						 iceland_golden_common_all,
655 						 (const u32)ARRAY_SIZE(iceland_golden_common_all));
656 		break;
657 	case CHIP_FIJI:
658 		amdgpu_program_register_sequence(adev,
659 						 fiji_mgcg_cgcg_init,
660 						 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
661 		amdgpu_program_register_sequence(adev,
662 						 golden_settings_fiji_a10,
663 						 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
664 		amdgpu_program_register_sequence(adev,
665 						 fiji_golden_common_all,
666 						 (const u32)ARRAY_SIZE(fiji_golden_common_all));
667 		break;
668 
669 	case CHIP_TONGA:
670 		amdgpu_program_register_sequence(adev,
671 						 tonga_mgcg_cgcg_init,
672 						 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
673 		amdgpu_program_register_sequence(adev,
674 						 golden_settings_tonga_a11,
675 						 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
676 		amdgpu_program_register_sequence(adev,
677 						 tonga_golden_common_all,
678 						 (const u32)ARRAY_SIZE(tonga_golden_common_all));
679 		break;
680 	case CHIP_POLARIS11:
681 		amdgpu_program_register_sequence(adev,
682 						 golden_settings_polaris11_a11,
683 						 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
684 		amdgpu_program_register_sequence(adev,
685 						 polaris11_golden_common_all,
686 						 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
687 		break;
688 	case CHIP_POLARIS10:
689 		amdgpu_program_register_sequence(adev,
690 						 golden_settings_polaris10_a11,
691 						 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
692 		amdgpu_program_register_sequence(adev,
693 						 polaris10_golden_common_all,
694 						 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
695 		break;
696 	case CHIP_CARRIZO:
697 		amdgpu_program_register_sequence(adev,
698 						 cz_mgcg_cgcg_init,
699 						 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
700 		amdgpu_program_register_sequence(adev,
701 						 cz_golden_settings_a11,
702 						 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
703 		amdgpu_program_register_sequence(adev,
704 						 cz_golden_common_all,
705 						 (const u32)ARRAY_SIZE(cz_golden_common_all));
706 		break;
707 	case CHIP_STONEY:
708 		amdgpu_program_register_sequence(adev,
709 						 stoney_mgcg_cgcg_init,
710 						 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
711 		amdgpu_program_register_sequence(adev,
712 						 stoney_golden_settings_a11,
713 						 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
714 		amdgpu_program_register_sequence(adev,
715 						 stoney_golden_common_all,
716 						 (const u32)ARRAY_SIZE(stoney_golden_common_all));
717 		break;
718 	default:
719 		break;
720 	}
721 }
722 
723 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
724 {
725 	int i;
726 
727 	adev->gfx.scratch.num_reg = 7;
728 	adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
729 	for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
730 		adev->gfx.scratch.free[i] = true;
731 		adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
732 	}
733 }
734 
735 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
736 {
737 	struct amdgpu_device *adev = ring->adev;
738 	uint32_t scratch;
739 	uint32_t tmp = 0;
740 	unsigned i;
741 	int r;
742 
743 	r = amdgpu_gfx_scratch_get(adev, &scratch);
744 	if (r) {
745 		DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
746 		return r;
747 	}
748 	WREG32(scratch, 0xCAFEDEAD);
749 	r = amdgpu_ring_alloc(ring, 3);
750 	if (r) {
751 		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
752 			  ring->idx, r);
753 		amdgpu_gfx_scratch_free(adev, scratch);
754 		return r;
755 	}
756 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
757 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
758 	amdgpu_ring_write(ring, 0xDEADBEEF);
759 	amdgpu_ring_commit(ring);
760 
761 	for (i = 0; i < adev->usec_timeout; i++) {
762 		tmp = RREG32(scratch);
763 		if (tmp == 0xDEADBEEF)
764 			break;
765 		DRM_UDELAY(1);
766 	}
767 	if (i < adev->usec_timeout) {
768 		DRM_INFO("ring test on %d succeeded in %d usecs\n",
769 			 ring->idx, i);
770 	} else {
771 		DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
772 			  ring->idx, scratch, tmp);
773 		r = -EINVAL;
774 	}
775 	amdgpu_gfx_scratch_free(adev, scratch);
776 	return r;
777 }
778 
779 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
780 {
781 	struct amdgpu_device *adev = ring->adev;
782 	struct amdgpu_ib ib;
783 	struct fence *f = NULL;
784 	uint32_t scratch;
785 	uint32_t tmp = 0;
786 	unsigned i;
787 	int r;
788 
789 	r = amdgpu_gfx_scratch_get(adev, &scratch);
790 	if (r) {
791 		DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
792 		return r;
793 	}
794 	WREG32(scratch, 0xCAFEDEAD);
795 	memset(&ib, 0, sizeof(ib));
796 	r = amdgpu_ib_get(adev, NULL, 256, &ib);
797 	if (r) {
798 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
799 		goto err1;
800 	}
801 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
802 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
803 	ib.ptr[2] = 0xDEADBEEF;
804 	ib.length_dw = 3;
805 
806 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
807 	if (r)
808 		goto err2;
809 
810 	r = fence_wait(f, false);
811 	if (r) {
812 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
813 		goto err2;
814 	}
815 	for (i = 0; i < adev->usec_timeout; i++) {
816 		tmp = RREG32(scratch);
817 		if (tmp == 0xDEADBEEF)
818 			break;
819 		DRM_UDELAY(1);
820 	}
821 	if (i < adev->usec_timeout) {
822 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
823 			 ring->idx, i);
824 		goto err2;
825 	} else {
826 		DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
827 			  scratch, tmp);
828 		r = -EINVAL;
829 	}
830 err2:
831 	fence_put(f);
832 	amdgpu_ib_free(adev, &ib, NULL);
833 	fence_put(f);
834 err1:
835 	amdgpu_gfx_scratch_free(adev, scratch);
836 	return r;
837 }
838 
839 
840 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
841 	release_firmware(adev->gfx.pfp_fw);
842 	adev->gfx.pfp_fw = NULL;
843 	release_firmware(adev->gfx.me_fw);
844 	adev->gfx.me_fw = NULL;
845 	release_firmware(adev->gfx.ce_fw);
846 	adev->gfx.ce_fw = NULL;
847 	release_firmware(adev->gfx.rlc_fw);
848 	adev->gfx.rlc_fw = NULL;
849 	release_firmware(adev->gfx.mec_fw);
850 	adev->gfx.mec_fw = NULL;
851 	if ((adev->asic_type != CHIP_STONEY) &&
852 	    (adev->asic_type != CHIP_TOPAZ))
853 		release_firmware(adev->gfx.mec2_fw);
854 	adev->gfx.mec2_fw = NULL;
855 
856 	kfree(adev->gfx.rlc.register_list_format);
857 }
858 
859 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
860 {
861 	const char *chip_name;
862 	char fw_name[30];
863 	int err;
864 	struct amdgpu_firmware_info *info = NULL;
865 	const struct common_firmware_header *header = NULL;
866 	const struct gfx_firmware_header_v1_0 *cp_hdr;
867 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
868 	unsigned int *tmp = NULL, i;
869 
870 	DRM_DEBUG("\n");
871 
872 	switch (adev->asic_type) {
873 	case CHIP_TOPAZ:
874 		chip_name = "topaz";
875 		break;
876 	case CHIP_TONGA:
877 		chip_name = "tonga";
878 		break;
879 	case CHIP_CARRIZO:
880 		chip_name = "carrizo";
881 		break;
882 	case CHIP_FIJI:
883 		chip_name = "fiji";
884 		break;
885 	case CHIP_POLARIS11:
886 		chip_name = "polaris11";
887 		break;
888 	case CHIP_POLARIS10:
889 		chip_name = "polaris10";
890 		break;
891 	case CHIP_STONEY:
892 		chip_name = "stoney";
893 		break;
894 	default:
895 		BUG();
896 	}
897 
898 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
899 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
900 	if (err)
901 		goto out;
902 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
903 	if (err)
904 		goto out;
905 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
906 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
907 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
908 
909 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
910 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
911 	if (err)
912 		goto out;
913 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
914 	if (err)
915 		goto out;
916 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
917 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
918 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
919 
920 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
921 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
922 	if (err)
923 		goto out;
924 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
925 	if (err)
926 		goto out;
927 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
928 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
929 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
930 
931 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
932 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
933 	if (err)
934 		goto out;
935 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
936 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
937 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
938 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
939 
940 	adev->gfx.rlc.save_and_restore_offset =
941 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
942 	adev->gfx.rlc.clear_state_descriptor_offset =
943 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
944 	adev->gfx.rlc.avail_scratch_ram_locations =
945 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
946 	adev->gfx.rlc.reg_restore_list_size =
947 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
948 	adev->gfx.rlc.reg_list_format_start =
949 			le32_to_cpu(rlc_hdr->reg_list_format_start);
950 	adev->gfx.rlc.reg_list_format_separate_start =
951 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
952 	adev->gfx.rlc.starting_offsets_start =
953 			le32_to_cpu(rlc_hdr->starting_offsets_start);
954 	adev->gfx.rlc.reg_list_format_size_bytes =
955 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
956 	adev->gfx.rlc.reg_list_size_bytes =
957 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
958 
959 	adev->gfx.rlc.register_list_format =
960 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
961 					adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
962 
963 	if (!adev->gfx.rlc.register_list_format) {
964 		err = -ENOMEM;
965 		goto out;
966 	}
967 
968 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
969 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
970 	for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
971 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
972 
973 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
974 
975 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
976 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
977 	for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
978 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
979 
980 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
981 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
982 	if (err)
983 		goto out;
984 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
985 	if (err)
986 		goto out;
987 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
988 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
989 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
990 
991 	if ((adev->asic_type != CHIP_STONEY) &&
992 	    (adev->asic_type != CHIP_TOPAZ)) {
993 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
994 		err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
995 		if (!err) {
996 			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
997 			if (err)
998 				goto out;
999 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1000 				adev->gfx.mec2_fw->data;
1001 			adev->gfx.mec2_fw_version =
1002 				le32_to_cpu(cp_hdr->header.ucode_version);
1003 			adev->gfx.mec2_feature_version =
1004 				le32_to_cpu(cp_hdr->ucode_feature_version);
1005 		} else {
1006 			err = 0;
1007 			adev->gfx.mec2_fw = NULL;
1008 		}
1009 	}
1010 
1011 	if (adev->firmware.smu_load) {
1012 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1013 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1014 		info->fw = adev->gfx.pfp_fw;
1015 		header = (const struct common_firmware_header *)info->fw->data;
1016 		adev->firmware.fw_size +=
1017 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1018 
1019 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1020 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1021 		info->fw = adev->gfx.me_fw;
1022 		header = (const struct common_firmware_header *)info->fw->data;
1023 		adev->firmware.fw_size +=
1024 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1025 
1026 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1027 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1028 		info->fw = adev->gfx.ce_fw;
1029 		header = (const struct common_firmware_header *)info->fw->data;
1030 		adev->firmware.fw_size +=
1031 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1032 
1033 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1034 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1035 		info->fw = adev->gfx.rlc_fw;
1036 		header = (const struct common_firmware_header *)info->fw->data;
1037 		adev->firmware.fw_size +=
1038 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1039 
1040 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1041 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1042 		info->fw = adev->gfx.mec_fw;
1043 		header = (const struct common_firmware_header *)info->fw->data;
1044 		adev->firmware.fw_size +=
1045 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1046 
1047 		if (adev->gfx.mec2_fw) {
1048 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1049 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1050 			info->fw = adev->gfx.mec2_fw;
1051 			header = (const struct common_firmware_header *)info->fw->data;
1052 			adev->firmware.fw_size +=
1053 				ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1054 		}
1055 
1056 	}
1057 
1058 out:
1059 	if (err) {
1060 		dev_err(adev->dev,
1061 			"gfx8: Failed to load firmware \"%s\"\n",
1062 			fw_name);
1063 		release_firmware(adev->gfx.pfp_fw);
1064 		adev->gfx.pfp_fw = NULL;
1065 		release_firmware(adev->gfx.me_fw);
1066 		adev->gfx.me_fw = NULL;
1067 		release_firmware(adev->gfx.ce_fw);
1068 		adev->gfx.ce_fw = NULL;
1069 		release_firmware(adev->gfx.rlc_fw);
1070 		adev->gfx.rlc_fw = NULL;
1071 		release_firmware(adev->gfx.mec_fw);
1072 		adev->gfx.mec_fw = NULL;
1073 		release_firmware(adev->gfx.mec2_fw);
1074 		adev->gfx.mec2_fw = NULL;
1075 	}
1076 	return err;
1077 }
1078 
1079 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1080 				    volatile u32 *buffer)
1081 {
1082 	u32 count = 0, i;
1083 	const struct cs_section_def *sect = NULL;
1084 	const struct cs_extent_def *ext = NULL;
1085 
1086 	if (adev->gfx.rlc.cs_data == NULL)
1087 		return;
1088 	if (buffer == NULL)
1089 		return;
1090 
1091 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1092 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1093 
1094 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1095 	buffer[count++] = cpu_to_le32(0x80000000);
1096 	buffer[count++] = cpu_to_le32(0x80000000);
1097 
1098 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1099 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1100 			if (sect->id == SECT_CONTEXT) {
1101 				buffer[count++] =
1102 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1103 				buffer[count++] = cpu_to_le32(ext->reg_index -
1104 						PACKET3_SET_CONTEXT_REG_START);
1105 				for (i = 0; i < ext->reg_count; i++)
1106 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1107 			} else {
1108 				return;
1109 			}
1110 		}
1111 	}
1112 
1113 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1114 	buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1115 			PACKET3_SET_CONTEXT_REG_START);
1116 	switch (adev->asic_type) {
1117 	case CHIP_TONGA:
1118 	case CHIP_POLARIS10:
1119 		buffer[count++] = cpu_to_le32(0x16000012);
1120 		buffer[count++] = cpu_to_le32(0x0000002A);
1121 		break;
1122 	case CHIP_POLARIS11:
1123 		buffer[count++] = cpu_to_le32(0x16000012);
1124 		buffer[count++] = cpu_to_le32(0x00000000);
1125 		break;
1126 	case CHIP_FIJI:
1127 		buffer[count++] = cpu_to_le32(0x3a00161a);
1128 		buffer[count++] = cpu_to_le32(0x0000002e);
1129 		break;
1130 	case CHIP_TOPAZ:
1131 	case CHIP_CARRIZO:
1132 		buffer[count++] = cpu_to_le32(0x00000002);
1133 		buffer[count++] = cpu_to_le32(0x00000000);
1134 		break;
1135 	case CHIP_STONEY:
1136 		buffer[count++] = cpu_to_le32(0x00000000);
1137 		buffer[count++] = cpu_to_le32(0x00000000);
1138 		break;
1139 	default:
1140 		buffer[count++] = cpu_to_le32(0x00000000);
1141 		buffer[count++] = cpu_to_le32(0x00000000);
1142 		break;
1143 	}
1144 
1145 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1146 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1147 
1148 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1149 	buffer[count++] = cpu_to_le32(0);
1150 }
1151 
1152 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1153 {
1154 	int r;
1155 
1156 	/* clear state block */
1157 	if (adev->gfx.rlc.clear_state_obj) {
1158 		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1159 		if (unlikely(r != 0))
1160 			dev_warn(adev->dev, "(%d) reserve RLC c bo failed\n", r);
1161 		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1162 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1163 
1164 		amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1165 		adev->gfx.rlc.clear_state_obj = NULL;
1166 	}
1167 }
1168 
1169 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1170 {
1171 	volatile u32 *dst_ptr;
1172 	u32 dws;
1173 	const struct cs_section_def *cs_data;
1174 	int r;
1175 
1176 	adev->gfx.rlc.cs_data = vi_cs_data;
1177 
1178 	cs_data = adev->gfx.rlc.cs_data;
1179 
1180 	if (cs_data) {
1181 		/* clear state block */
1182 		adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1183 
1184 		if (adev->gfx.rlc.clear_state_obj == NULL) {
1185 			r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1186 					     AMDGPU_GEM_DOMAIN_VRAM,
1187 					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1188 					     NULL, NULL,
1189 					     &adev->gfx.rlc.clear_state_obj);
1190 			if (r) {
1191 				dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1192 				gfx_v8_0_rlc_fini(adev);
1193 				return r;
1194 			}
1195 		}
1196 		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1197 		if (unlikely(r != 0)) {
1198 			gfx_v8_0_rlc_fini(adev);
1199 			return r;
1200 		}
1201 		r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1202 				  &adev->gfx.rlc.clear_state_gpu_addr);
1203 		if (r) {
1204 			amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1205 			dev_warn(adev->dev, "(%d) pin RLC c bo failed\n", r);
1206 			gfx_v8_0_rlc_fini(adev);
1207 			return r;
1208 		}
1209 
1210 		r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1211 		if (r) {
1212 			dev_warn(adev->dev, "(%d) map RLC c bo failed\n", r);
1213 			gfx_v8_0_rlc_fini(adev);
1214 			return r;
1215 		}
1216 		/* set up the cs buffer */
1217 		dst_ptr = adev->gfx.rlc.cs_ptr;
1218 		gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1219 		amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1220 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1221 	}
1222 
1223 	return 0;
1224 }
1225 
1226 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1227 {
1228 	int r;
1229 
1230 	if (adev->gfx.mec.hpd_eop_obj) {
1231 		r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1232 		if (unlikely(r != 0))
1233 			dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1234 		amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1235 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1236 
1237 		amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1238 		adev->gfx.mec.hpd_eop_obj = NULL;
1239 	}
1240 }
1241 
1242 #define MEC_HPD_SIZE 2048
1243 
1244 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1245 {
1246 	int r;
1247 	u32 *hpd;
1248 
1249 	/*
1250 	 * we assign only 1 pipe because all other pipes will
1251 	 * be handled by KFD
1252 	 */
1253 	adev->gfx.mec.num_mec = 1;
1254 	adev->gfx.mec.num_pipe = 1;
1255 	adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1256 
1257 	if (adev->gfx.mec.hpd_eop_obj == NULL) {
1258 		r = amdgpu_bo_create(adev,
1259 				     adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
1260 				     PAGE_SIZE, true,
1261 				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1262 				     &adev->gfx.mec.hpd_eop_obj);
1263 		if (r) {
1264 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1265 			return r;
1266 		}
1267 	}
1268 
1269 	r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1270 	if (unlikely(r != 0)) {
1271 		gfx_v8_0_mec_fini(adev);
1272 		return r;
1273 	}
1274 	r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1275 			  &adev->gfx.mec.hpd_eop_gpu_addr);
1276 	if (r) {
1277 		dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1278 		gfx_v8_0_mec_fini(adev);
1279 		return r;
1280 	}
1281 	r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1282 	if (r) {
1283 		dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1284 		gfx_v8_0_mec_fini(adev);
1285 		return r;
1286 	}
1287 
1288 	memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
1289 
1290 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1291 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1292 
1293 	return 0;
1294 }
1295 
1296 static const u32 vgpr_init_compute_shader[] =
1297 {
1298 	0x7e000209, 0x7e020208,
1299 	0x7e040207, 0x7e060206,
1300 	0x7e080205, 0x7e0a0204,
1301 	0x7e0c0203, 0x7e0e0202,
1302 	0x7e100201, 0x7e120200,
1303 	0x7e140209, 0x7e160208,
1304 	0x7e180207, 0x7e1a0206,
1305 	0x7e1c0205, 0x7e1e0204,
1306 	0x7e200203, 0x7e220202,
1307 	0x7e240201, 0x7e260200,
1308 	0x7e280209, 0x7e2a0208,
1309 	0x7e2c0207, 0x7e2e0206,
1310 	0x7e300205, 0x7e320204,
1311 	0x7e340203, 0x7e360202,
1312 	0x7e380201, 0x7e3a0200,
1313 	0x7e3c0209, 0x7e3e0208,
1314 	0x7e400207, 0x7e420206,
1315 	0x7e440205, 0x7e460204,
1316 	0x7e480203, 0x7e4a0202,
1317 	0x7e4c0201, 0x7e4e0200,
1318 	0x7e500209, 0x7e520208,
1319 	0x7e540207, 0x7e560206,
1320 	0x7e580205, 0x7e5a0204,
1321 	0x7e5c0203, 0x7e5e0202,
1322 	0x7e600201, 0x7e620200,
1323 	0x7e640209, 0x7e660208,
1324 	0x7e680207, 0x7e6a0206,
1325 	0x7e6c0205, 0x7e6e0204,
1326 	0x7e700203, 0x7e720202,
1327 	0x7e740201, 0x7e760200,
1328 	0x7e780209, 0x7e7a0208,
1329 	0x7e7c0207, 0x7e7e0206,
1330 	0xbf8a0000, 0xbf810000,
1331 };
1332 
1333 static const u32 sgpr_init_compute_shader[] =
1334 {
1335 	0xbe8a0100, 0xbe8c0102,
1336 	0xbe8e0104, 0xbe900106,
1337 	0xbe920108, 0xbe940100,
1338 	0xbe960102, 0xbe980104,
1339 	0xbe9a0106, 0xbe9c0108,
1340 	0xbe9e0100, 0xbea00102,
1341 	0xbea20104, 0xbea40106,
1342 	0xbea60108, 0xbea80100,
1343 	0xbeaa0102, 0xbeac0104,
1344 	0xbeae0106, 0xbeb00108,
1345 	0xbeb20100, 0xbeb40102,
1346 	0xbeb60104, 0xbeb80106,
1347 	0xbeba0108, 0xbebc0100,
1348 	0xbebe0102, 0xbec00104,
1349 	0xbec20106, 0xbec40108,
1350 	0xbec60100, 0xbec80102,
1351 	0xbee60004, 0xbee70005,
1352 	0xbeea0006, 0xbeeb0007,
1353 	0xbee80008, 0xbee90009,
1354 	0xbefc0000, 0xbf8a0000,
1355 	0xbf810000, 0x00000000,
1356 };
1357 
1358 static const u32 vgpr_init_regs[] =
1359 {
1360 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1361 	mmCOMPUTE_RESOURCE_LIMITS, 0,
1362 	mmCOMPUTE_NUM_THREAD_X, 256*4,
1363 	mmCOMPUTE_NUM_THREAD_Y, 1,
1364 	mmCOMPUTE_NUM_THREAD_Z, 1,
1365 	mmCOMPUTE_PGM_RSRC2, 20,
1366 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1367 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1368 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1369 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1370 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1371 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1372 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1373 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1374 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1375 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1376 };
1377 
1378 static const u32 sgpr1_init_regs[] =
1379 {
1380 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1381 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1382 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1383 	mmCOMPUTE_NUM_THREAD_Y, 1,
1384 	mmCOMPUTE_NUM_THREAD_Z, 1,
1385 	mmCOMPUTE_PGM_RSRC2, 20,
1386 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1387 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1388 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1389 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1390 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1391 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1392 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1393 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1394 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1395 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1396 };
1397 
1398 static const u32 sgpr2_init_regs[] =
1399 {
1400 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1401 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1402 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1403 	mmCOMPUTE_NUM_THREAD_Y, 1,
1404 	mmCOMPUTE_NUM_THREAD_Z, 1,
1405 	mmCOMPUTE_PGM_RSRC2, 20,
1406 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1407 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1408 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1409 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1410 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1411 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1412 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1413 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1414 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1415 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1416 };
1417 
1418 static const u32 sec_ded_counter_registers[] =
1419 {
1420 	mmCPC_EDC_ATC_CNT,
1421 	mmCPC_EDC_SCRATCH_CNT,
1422 	mmCPC_EDC_UCODE_CNT,
1423 	mmCPF_EDC_ATC_CNT,
1424 	mmCPF_EDC_ROQ_CNT,
1425 	mmCPF_EDC_TAG_CNT,
1426 	mmCPG_EDC_ATC_CNT,
1427 	mmCPG_EDC_DMA_CNT,
1428 	mmCPG_EDC_TAG_CNT,
1429 	mmDC_EDC_CSINVOC_CNT,
1430 	mmDC_EDC_RESTORE_CNT,
1431 	mmDC_EDC_STATE_CNT,
1432 	mmGDS_EDC_CNT,
1433 	mmGDS_EDC_GRBM_CNT,
1434 	mmGDS_EDC_OA_DED,
1435 	mmSPI_EDC_CNT,
1436 	mmSQC_ATC_EDC_GATCL1_CNT,
1437 	mmSQC_EDC_CNT,
1438 	mmSQ_EDC_DED_CNT,
1439 	mmSQ_EDC_INFO,
1440 	mmSQ_EDC_SEC_CNT,
1441 	mmTCC_EDC_CNT,
1442 	mmTCP_ATC_EDC_GATCL1_CNT,
1443 	mmTCP_EDC_CNT,
1444 	mmTD_EDC_CNT
1445 };
1446 
1447 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1448 {
1449 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1450 	struct amdgpu_ib ib;
1451 	struct fence *f = NULL;
1452 	int r, i;
1453 	u32 tmp;
1454 	unsigned total_size, vgpr_offset, sgpr_offset;
1455 	u64 gpu_addr;
1456 
1457 	/* only supported on CZ */
1458 	if (adev->asic_type != CHIP_CARRIZO)
1459 		return 0;
1460 
1461 	/* bail if the compute ring is not ready */
1462 	if (!ring->ready)
1463 		return 0;
1464 
1465 	tmp = RREG32(mmGB_EDC_MODE);
1466 	WREG32(mmGB_EDC_MODE, 0);
1467 
1468 	total_size =
1469 		(((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1470 	total_size +=
1471 		(((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1472 	total_size +=
1473 		(((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1474 	total_size = ALIGN(total_size, 256);
1475 	vgpr_offset = total_size;
1476 	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1477 	sgpr_offset = total_size;
1478 	total_size += sizeof(sgpr_init_compute_shader);
1479 
1480 	/* allocate an indirect buffer to put the commands in */
1481 	memset(&ib, 0, sizeof(ib));
1482 	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1483 	if (r) {
1484 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1485 		return r;
1486 	}
1487 
1488 	/* load the compute shaders */
1489 	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1490 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1491 
1492 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1493 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1494 
1495 	/* init the ib length to 0 */
1496 	ib.length_dw = 0;
1497 
1498 	/* VGPR */
1499 	/* write the register state for the compute dispatch */
1500 	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1501 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1502 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1503 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1504 	}
1505 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1506 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1507 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1508 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1509 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1510 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1511 
1512 	/* write dispatch packet */
1513 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1514 	ib.ptr[ib.length_dw++] = 8; /* x */
1515 	ib.ptr[ib.length_dw++] = 1; /* y */
1516 	ib.ptr[ib.length_dw++] = 1; /* z */
1517 	ib.ptr[ib.length_dw++] =
1518 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1519 
1520 	/* write CS partial flush packet */
1521 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1522 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1523 
1524 	/* SGPR1 */
1525 	/* write the register state for the compute dispatch */
1526 	for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1527 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1528 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1529 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1530 	}
1531 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1532 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1533 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1534 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1535 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1536 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1537 
1538 	/* write dispatch packet */
1539 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1540 	ib.ptr[ib.length_dw++] = 8; /* x */
1541 	ib.ptr[ib.length_dw++] = 1; /* y */
1542 	ib.ptr[ib.length_dw++] = 1; /* z */
1543 	ib.ptr[ib.length_dw++] =
1544 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1545 
1546 	/* write CS partial flush packet */
1547 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1548 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1549 
1550 	/* SGPR2 */
1551 	/* write the register state for the compute dispatch */
1552 	for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1553 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1554 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1555 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1556 	}
1557 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1558 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1559 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1560 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1561 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1562 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1563 
1564 	/* write dispatch packet */
1565 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1566 	ib.ptr[ib.length_dw++] = 8; /* x */
1567 	ib.ptr[ib.length_dw++] = 1; /* y */
1568 	ib.ptr[ib.length_dw++] = 1; /* z */
1569 	ib.ptr[ib.length_dw++] =
1570 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1571 
1572 	/* write CS partial flush packet */
1573 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1574 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1575 
1576 	/* shedule the ib on the ring */
1577 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
1578 	if (r) {
1579 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1580 		goto fail;
1581 	}
1582 
1583 	/* wait for the GPU to finish processing the IB */
1584 	r = fence_wait(f, false);
1585 	if (r) {
1586 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1587 		goto fail;
1588 	}
1589 
1590 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1591 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1592 	WREG32(mmGB_EDC_MODE, tmp);
1593 
1594 	tmp = RREG32(mmCC_GC_EDC_CONFIG);
1595 	tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1596 	WREG32(mmCC_GC_EDC_CONFIG, tmp);
1597 
1598 
1599 	/* read back registers to clear the counters */
1600 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1601 		RREG32(sec_ded_counter_registers[i]);
1602 
1603 fail:
1604 	fence_put(f);
1605 	amdgpu_ib_free(adev, &ib, NULL);
1606 	fence_put(f);
1607 
1608 	return r;
1609 }
1610 
1611 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1612 {
1613 	u32 gb_addr_config;
1614 	u32 mc_shared_chmap, mc_arb_ramcfg;
1615 	u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1616 	u32 tmp;
1617 	int ret;
1618 
1619 	switch (adev->asic_type) {
1620 	case CHIP_TOPAZ:
1621 		adev->gfx.config.max_shader_engines = 1;
1622 		adev->gfx.config.max_tile_pipes = 2;
1623 		adev->gfx.config.max_cu_per_sh = 6;
1624 		adev->gfx.config.max_sh_per_se = 1;
1625 		adev->gfx.config.max_backends_per_se = 2;
1626 		adev->gfx.config.max_texture_channel_caches = 2;
1627 		adev->gfx.config.max_gprs = 256;
1628 		adev->gfx.config.max_gs_threads = 32;
1629 		adev->gfx.config.max_hw_contexts = 8;
1630 
1631 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1632 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1633 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1634 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1635 		gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1636 		break;
1637 	case CHIP_FIJI:
1638 		adev->gfx.config.max_shader_engines = 4;
1639 		adev->gfx.config.max_tile_pipes = 16;
1640 		adev->gfx.config.max_cu_per_sh = 16;
1641 		adev->gfx.config.max_sh_per_se = 1;
1642 		adev->gfx.config.max_backends_per_se = 4;
1643 		adev->gfx.config.max_texture_channel_caches = 16;
1644 		adev->gfx.config.max_gprs = 256;
1645 		adev->gfx.config.max_gs_threads = 32;
1646 		adev->gfx.config.max_hw_contexts = 8;
1647 
1648 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1649 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1650 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1651 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1652 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1653 		break;
1654 	case CHIP_POLARIS11:
1655 		ret = amdgpu_atombios_get_gfx_info(adev);
1656 		if (ret)
1657 			return ret;
1658 		adev->gfx.config.max_gprs = 256;
1659 		adev->gfx.config.max_gs_threads = 32;
1660 		adev->gfx.config.max_hw_contexts = 8;
1661 
1662 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1663 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1664 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1665 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1666 		gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1667 		break;
1668 	case CHIP_POLARIS10:
1669 		ret = amdgpu_atombios_get_gfx_info(adev);
1670 		if (ret)
1671 			return ret;
1672 		adev->gfx.config.max_gprs = 256;
1673 		adev->gfx.config.max_gs_threads = 32;
1674 		adev->gfx.config.max_hw_contexts = 8;
1675 
1676 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1677 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1678 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1679 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1680 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1681 		break;
1682 	case CHIP_TONGA:
1683 		adev->gfx.config.max_shader_engines = 4;
1684 		adev->gfx.config.max_tile_pipes = 8;
1685 		adev->gfx.config.max_cu_per_sh = 8;
1686 		adev->gfx.config.max_sh_per_se = 1;
1687 		adev->gfx.config.max_backends_per_se = 2;
1688 		adev->gfx.config.max_texture_channel_caches = 8;
1689 		adev->gfx.config.max_gprs = 256;
1690 		adev->gfx.config.max_gs_threads = 32;
1691 		adev->gfx.config.max_hw_contexts = 8;
1692 
1693 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1694 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1695 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1696 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1697 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1698 		break;
1699 	case CHIP_CARRIZO:
1700 		adev->gfx.config.max_shader_engines = 1;
1701 		adev->gfx.config.max_tile_pipes = 2;
1702 		adev->gfx.config.max_sh_per_se = 1;
1703 		adev->gfx.config.max_backends_per_se = 2;
1704 
1705 		switch (adev->pdev->revision) {
1706 		case 0xc4:
1707 		case 0x84:
1708 		case 0xc8:
1709 		case 0xcc:
1710 		case 0xe1:
1711 		case 0xe3:
1712 			/* B10 */
1713 			adev->gfx.config.max_cu_per_sh = 8;
1714 			break;
1715 		case 0xc5:
1716 		case 0x81:
1717 		case 0x85:
1718 		case 0xc9:
1719 		case 0xcd:
1720 		case 0xe2:
1721 		case 0xe4:
1722 			/* B8 */
1723 			adev->gfx.config.max_cu_per_sh = 6;
1724 			break;
1725 		case 0xc6:
1726 		case 0xca:
1727 		case 0xce:
1728 		case 0x88:
1729 			/* B6 */
1730 			adev->gfx.config.max_cu_per_sh = 6;
1731 			break;
1732 		case 0xc7:
1733 		case 0x87:
1734 		case 0xcb:
1735 		case 0xe5:
1736 		case 0x89:
1737 		default:
1738 			/* B4 */
1739 			adev->gfx.config.max_cu_per_sh = 4;
1740 			break;
1741 		}
1742 
1743 		adev->gfx.config.max_texture_channel_caches = 2;
1744 		adev->gfx.config.max_gprs = 256;
1745 		adev->gfx.config.max_gs_threads = 32;
1746 		adev->gfx.config.max_hw_contexts = 8;
1747 
1748 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1749 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1750 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1751 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1752 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1753 		break;
1754 	case CHIP_STONEY:
1755 		adev->gfx.config.max_shader_engines = 1;
1756 		adev->gfx.config.max_tile_pipes = 2;
1757 		adev->gfx.config.max_sh_per_se = 1;
1758 		adev->gfx.config.max_backends_per_se = 1;
1759 
1760 		switch (adev->pdev->revision) {
1761 		case 0xc0:
1762 		case 0xc1:
1763 		case 0xc2:
1764 		case 0xc4:
1765 		case 0xc8:
1766 		case 0xc9:
1767 			adev->gfx.config.max_cu_per_sh = 3;
1768 			break;
1769 		case 0xd0:
1770 		case 0xd1:
1771 		case 0xd2:
1772 		default:
1773 			adev->gfx.config.max_cu_per_sh = 2;
1774 			break;
1775 		}
1776 
1777 		adev->gfx.config.max_texture_channel_caches = 2;
1778 		adev->gfx.config.max_gprs = 256;
1779 		adev->gfx.config.max_gs_threads = 16;
1780 		adev->gfx.config.max_hw_contexts = 8;
1781 
1782 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1783 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1784 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1785 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1786 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1787 		break;
1788 	default:
1789 		adev->gfx.config.max_shader_engines = 2;
1790 		adev->gfx.config.max_tile_pipes = 4;
1791 		adev->gfx.config.max_cu_per_sh = 2;
1792 		adev->gfx.config.max_sh_per_se = 1;
1793 		adev->gfx.config.max_backends_per_se = 2;
1794 		adev->gfx.config.max_texture_channel_caches = 4;
1795 		adev->gfx.config.max_gprs = 256;
1796 		adev->gfx.config.max_gs_threads = 32;
1797 		adev->gfx.config.max_hw_contexts = 8;
1798 
1799 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1800 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1801 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1802 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1803 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1804 		break;
1805 	}
1806 
1807 	mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1808 	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1809 	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1810 
1811 	adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1812 	adev->gfx.config.mem_max_burst_length_bytes = 256;
1813 	if (adev->flags & AMD_IS_APU) {
1814 		/* Get memory bank mapping mode. */
1815 		tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1816 		dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1817 		dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1818 
1819 		tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1820 		dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1821 		dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1822 
1823 		/* Validate settings in case only one DIMM installed. */
1824 		if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1825 			dimm00_addr_map = 0;
1826 		if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1827 			dimm01_addr_map = 0;
1828 		if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1829 			dimm10_addr_map = 0;
1830 		if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1831 			dimm11_addr_map = 0;
1832 
1833 		/* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1834 		/* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1835 		if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1836 			adev->gfx.config.mem_row_size_in_kb = 2;
1837 		else
1838 			adev->gfx.config.mem_row_size_in_kb = 1;
1839 	} else {
1840 		tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1841 		adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1842 		if (adev->gfx.config.mem_row_size_in_kb > 4)
1843 			adev->gfx.config.mem_row_size_in_kb = 4;
1844 	}
1845 
1846 	adev->gfx.config.shader_engine_tile_size = 32;
1847 	adev->gfx.config.num_gpus = 1;
1848 	adev->gfx.config.multi_gpu_tile_size = 64;
1849 
1850 	/* fix up row size */
1851 	switch (adev->gfx.config.mem_row_size_in_kb) {
1852 	case 1:
1853 	default:
1854 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1855 		break;
1856 	case 2:
1857 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1858 		break;
1859 	case 4:
1860 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1861 		break;
1862 	}
1863 	adev->gfx.config.gb_addr_config = gb_addr_config;
1864 
1865 	return 0;
1866 }
1867 
1868 static int gfx_v8_0_sw_init(void *handle)
1869 {
1870 	int i, r;
1871 	struct amdgpu_ring *ring;
1872 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1873 
1874 	/* EOP Event */
1875 	r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1876 	if (r)
1877 		return r;
1878 
1879 	/* Privileged reg */
1880 	r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1881 	if (r)
1882 		return r;
1883 
1884 	/* Privileged inst */
1885 	r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
1886 	if (r)
1887 		return r;
1888 
1889 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1890 
1891 	gfx_v8_0_scratch_init(adev);
1892 
1893 	r = gfx_v8_0_init_microcode(adev);
1894 	if (r) {
1895 		DRM_ERROR("Failed to load gfx firmware!\n");
1896 		return r;
1897 	}
1898 
1899 	r = gfx_v8_0_rlc_init(adev);
1900 	if (r) {
1901 		DRM_ERROR("Failed to init rlc BOs!\n");
1902 		return r;
1903 	}
1904 
1905 	r = gfx_v8_0_mec_init(adev);
1906 	if (r) {
1907 		DRM_ERROR("Failed to init MEC BOs!\n");
1908 		return r;
1909 	}
1910 
1911 	/* set up the gfx ring */
1912 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1913 		ring = &adev->gfx.gfx_ring[i];
1914 		ring->ring_obj = NULL;
1915 		sprintf(ring->name, "gfx");
1916 		/* no gfx doorbells on iceland */
1917 		if (adev->asic_type != CHIP_TOPAZ) {
1918 			ring->use_doorbell = true;
1919 			ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
1920 		}
1921 
1922 		r = amdgpu_ring_init(adev, ring, 1024,
1923 				     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1924 				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
1925 				     AMDGPU_RING_TYPE_GFX);
1926 		if (r)
1927 			return r;
1928 	}
1929 
1930 	/* set up the compute queues */
1931 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1932 		unsigned irq_type;
1933 
1934 		/* max 32 queues per MEC */
1935 		if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
1936 			DRM_ERROR("Too many (%d) compute rings!\n", i);
1937 			break;
1938 		}
1939 		ring = &adev->gfx.compute_ring[i];
1940 		ring->ring_obj = NULL;
1941 		ring->use_doorbell = true;
1942 		ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
1943 		ring->me = 1; /* first MEC */
1944 		ring->pipe = i / 8;
1945 		ring->queue = i % 8;
1946 		sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1947 		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
1948 		/* type-2 packets are deprecated on MEC, use type-3 instead */
1949 		r = amdgpu_ring_init(adev, ring, 1024,
1950 				     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1951 				     &adev->gfx.eop_irq, irq_type,
1952 				     AMDGPU_RING_TYPE_COMPUTE);
1953 		if (r)
1954 			return r;
1955 	}
1956 
1957 	/* reserve GDS, GWS and OA resource for gfx */
1958 	r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
1959 			PAGE_SIZE, true,
1960 			AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
1961 			NULL, &adev->gds.gds_gfx_bo);
1962 	if (r)
1963 		return r;
1964 
1965 	r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
1966 		PAGE_SIZE, true,
1967 		AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
1968 		NULL, &adev->gds.gws_gfx_bo);
1969 	if (r)
1970 		return r;
1971 
1972 	r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
1973 			PAGE_SIZE, true,
1974 			AMDGPU_GEM_DOMAIN_OA, 0, NULL,
1975 			NULL, &adev->gds.oa_gfx_bo);
1976 	if (r)
1977 		return r;
1978 
1979 	adev->gfx.ce_ram_size = 0x8000;
1980 
1981 	r = gfx_v8_0_gpu_early_init(adev);
1982 	if (r)
1983 		return r;
1984 
1985 	return 0;
1986 }
1987 
1988 static int gfx_v8_0_sw_fini(void *handle)
1989 {
1990 	int i;
1991 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1992 
1993 	amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
1994 	amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
1995 	amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
1996 
1997 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1998 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1999 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2000 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2001 
2002 	gfx_v8_0_mec_fini(adev);
2003 
2004 	gfx_v8_0_rlc_fini(adev);
2005 
2006 	gfx_v8_0_free_microcode(adev);
2007 
2008 	return 0;
2009 }
2010 
2011 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2012 {
2013 	uint32_t *modearray, *mod2array;
2014 	const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2015 	const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2016 	u32 reg_offset;
2017 
2018 	modearray = adev->gfx.config.tile_mode_array;
2019 	mod2array = adev->gfx.config.macrotile_mode_array;
2020 
2021 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2022 		modearray[reg_offset] = 0;
2023 
2024 	for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2025 		mod2array[reg_offset] = 0;
2026 
2027 	switch (adev->asic_type) {
2028 	case CHIP_TOPAZ:
2029 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2030 				PIPE_CONFIG(ADDR_SURF_P2) |
2031 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2032 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2033 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2034 				PIPE_CONFIG(ADDR_SURF_P2) |
2035 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2036 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2037 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2038 				PIPE_CONFIG(ADDR_SURF_P2) |
2039 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2040 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2041 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2042 				PIPE_CONFIG(ADDR_SURF_P2) |
2043 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2044 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2045 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2046 				PIPE_CONFIG(ADDR_SURF_P2) |
2047 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2048 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2049 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2050 				PIPE_CONFIG(ADDR_SURF_P2) |
2051 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2052 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2053 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2054 				PIPE_CONFIG(ADDR_SURF_P2) |
2055 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2056 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2057 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2058 				PIPE_CONFIG(ADDR_SURF_P2));
2059 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2060 				PIPE_CONFIG(ADDR_SURF_P2) |
2061 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2062 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2063 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2064 				 PIPE_CONFIG(ADDR_SURF_P2) |
2065 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2066 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2067 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2068 				 PIPE_CONFIG(ADDR_SURF_P2) |
2069 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2070 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2071 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2072 				 PIPE_CONFIG(ADDR_SURF_P2) |
2073 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2074 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2075 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2076 				 PIPE_CONFIG(ADDR_SURF_P2) |
2077 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2078 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2079 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2080 				 PIPE_CONFIG(ADDR_SURF_P2) |
2081 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2082 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2083 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2084 				 PIPE_CONFIG(ADDR_SURF_P2) |
2085 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2086 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2087 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2088 				 PIPE_CONFIG(ADDR_SURF_P2) |
2089 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2090 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2091 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2092 				 PIPE_CONFIG(ADDR_SURF_P2) |
2093 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2094 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2095 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2096 				 PIPE_CONFIG(ADDR_SURF_P2) |
2097 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2098 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2099 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2100 				 PIPE_CONFIG(ADDR_SURF_P2) |
2101 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2102 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2103 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2104 				 PIPE_CONFIG(ADDR_SURF_P2) |
2105 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2106 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2107 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2108 				 PIPE_CONFIG(ADDR_SURF_P2) |
2109 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2110 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2111 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2112 				 PIPE_CONFIG(ADDR_SURF_P2) |
2113 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2114 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2115 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2116 				 PIPE_CONFIG(ADDR_SURF_P2) |
2117 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2118 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2119 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2120 				 PIPE_CONFIG(ADDR_SURF_P2) |
2121 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2122 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2123 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2124 				 PIPE_CONFIG(ADDR_SURF_P2) |
2125 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2126 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2127 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2128 				 PIPE_CONFIG(ADDR_SURF_P2) |
2129 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2130 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2131 
2132 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2133 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2134 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2135 				NUM_BANKS(ADDR_SURF_8_BANK));
2136 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2137 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2138 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2139 				NUM_BANKS(ADDR_SURF_8_BANK));
2140 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2141 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2142 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2143 				NUM_BANKS(ADDR_SURF_8_BANK));
2144 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2145 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2146 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2147 				NUM_BANKS(ADDR_SURF_8_BANK));
2148 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2149 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2150 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2151 				NUM_BANKS(ADDR_SURF_8_BANK));
2152 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2153 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2154 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2155 				NUM_BANKS(ADDR_SURF_8_BANK));
2156 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2157 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2158 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2159 				NUM_BANKS(ADDR_SURF_8_BANK));
2160 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2161 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2162 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2163 				NUM_BANKS(ADDR_SURF_16_BANK));
2164 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2165 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2166 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2167 				NUM_BANKS(ADDR_SURF_16_BANK));
2168 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2169 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2170 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2171 				 NUM_BANKS(ADDR_SURF_16_BANK));
2172 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2173 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2174 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2175 				 NUM_BANKS(ADDR_SURF_16_BANK));
2176 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2177 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2178 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2179 				 NUM_BANKS(ADDR_SURF_16_BANK));
2180 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2181 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2182 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2183 				 NUM_BANKS(ADDR_SURF_16_BANK));
2184 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2185 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2186 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2187 				 NUM_BANKS(ADDR_SURF_8_BANK));
2188 
2189 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2190 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2191 			    reg_offset != 23)
2192 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2193 
2194 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2195 			if (reg_offset != 7)
2196 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2197 
2198 		break;
2199 	case CHIP_FIJI:
2200 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2201 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2202 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2203 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2204 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2205 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2206 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2207 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2208 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2209 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2210 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2211 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2212 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2213 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2214 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2215 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2216 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2217 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2218 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2219 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2220 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2221 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2222 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2223 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2224 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2225 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2226 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2227 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2228 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2229 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2230 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2231 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2232 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2233 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2234 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2235 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2236 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2237 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2238 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2239 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2240 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2241 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2242 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2243 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2244 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2245 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2246 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2247 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2248 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2249 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2250 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2251 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2252 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2253 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2254 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2255 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2256 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2257 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2258 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2259 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2260 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2261 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2262 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2263 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2264 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2265 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2266 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2267 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2268 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2269 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2270 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2271 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2272 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2273 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2274 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2275 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2276 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2277 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2278 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2279 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2280 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2281 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2282 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2283 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2284 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2285 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2286 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2287 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2288 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2289 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2290 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2291 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2292 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2293 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2294 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2295 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2296 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2297 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2298 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2299 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2300 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2301 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2302 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2303 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2304 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2305 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2306 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2307 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2308 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2309 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2310 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2311 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2312 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2313 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2314 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2315 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2316 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2317 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2318 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2319 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2320 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2321 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2322 
2323 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2324 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2325 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2326 				NUM_BANKS(ADDR_SURF_8_BANK));
2327 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2328 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2329 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2330 				NUM_BANKS(ADDR_SURF_8_BANK));
2331 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2332 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2333 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2334 				NUM_BANKS(ADDR_SURF_8_BANK));
2335 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2336 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2337 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2338 				NUM_BANKS(ADDR_SURF_8_BANK));
2339 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2340 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2341 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2342 				NUM_BANKS(ADDR_SURF_8_BANK));
2343 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2344 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2345 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2346 				NUM_BANKS(ADDR_SURF_8_BANK));
2347 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2348 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2349 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2350 				NUM_BANKS(ADDR_SURF_8_BANK));
2351 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2352 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2353 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2354 				NUM_BANKS(ADDR_SURF_8_BANK));
2355 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2356 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2357 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2358 				NUM_BANKS(ADDR_SURF_8_BANK));
2359 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2360 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2361 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2362 				 NUM_BANKS(ADDR_SURF_8_BANK));
2363 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2364 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2365 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2366 				 NUM_BANKS(ADDR_SURF_8_BANK));
2367 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2368 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2369 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2370 				 NUM_BANKS(ADDR_SURF_8_BANK));
2371 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2372 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2373 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2374 				 NUM_BANKS(ADDR_SURF_8_BANK));
2375 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2376 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2377 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2378 				 NUM_BANKS(ADDR_SURF_4_BANK));
2379 
2380 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2381 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2382 
2383 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2384 			if (reg_offset != 7)
2385 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2386 
2387 		break;
2388 	case CHIP_TONGA:
2389 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2390 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2391 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2392 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2393 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2394 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2395 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2396 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2397 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2398 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2399 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2400 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2401 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2402 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2403 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2404 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2405 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2406 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2407 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2408 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2409 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2410 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2411 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2412 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2413 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2414 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2415 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2416 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2417 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2418 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2419 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2420 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2421 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2422 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2423 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2424 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2425 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2426 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2427 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2428 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2429 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2430 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2431 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2432 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2433 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2434 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2435 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2436 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2437 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2438 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2439 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2440 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2441 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2442 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2443 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2444 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2445 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2446 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2447 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2448 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2449 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2450 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2451 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2452 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2453 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2454 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2455 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2456 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2457 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2458 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2459 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2460 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2461 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2462 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2463 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2464 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2465 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2466 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2467 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2468 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2469 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2470 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2471 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2472 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2473 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2474 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2475 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2476 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2477 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2478 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2479 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2480 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2481 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2482 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2483 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2484 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2485 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2486 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2487 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2488 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2489 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2490 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2491 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2492 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2493 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2494 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2495 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2496 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2497 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2498 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2499 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2500 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2501 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2502 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2503 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2504 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2505 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2506 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2507 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2508 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2509 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2510 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2511 
2512 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2513 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2514 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2515 				NUM_BANKS(ADDR_SURF_16_BANK));
2516 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2517 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2518 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2519 				NUM_BANKS(ADDR_SURF_16_BANK));
2520 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2521 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2522 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2523 				NUM_BANKS(ADDR_SURF_16_BANK));
2524 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2525 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2526 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2527 				NUM_BANKS(ADDR_SURF_16_BANK));
2528 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2529 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2530 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2531 				NUM_BANKS(ADDR_SURF_16_BANK));
2532 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2533 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2534 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2535 				NUM_BANKS(ADDR_SURF_16_BANK));
2536 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2537 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2538 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2539 				NUM_BANKS(ADDR_SURF_16_BANK));
2540 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2541 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2542 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2543 				NUM_BANKS(ADDR_SURF_16_BANK));
2544 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2545 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2546 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2547 				NUM_BANKS(ADDR_SURF_16_BANK));
2548 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2549 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2550 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2551 				 NUM_BANKS(ADDR_SURF_16_BANK));
2552 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2553 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2554 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2555 				 NUM_BANKS(ADDR_SURF_16_BANK));
2556 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2557 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2558 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2559 				 NUM_BANKS(ADDR_SURF_8_BANK));
2560 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2561 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2562 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2563 				 NUM_BANKS(ADDR_SURF_4_BANK));
2564 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2565 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2566 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2567 				 NUM_BANKS(ADDR_SURF_4_BANK));
2568 
2569 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2570 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2571 
2572 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2573 			if (reg_offset != 7)
2574 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2575 
2576 		break;
2577 	case CHIP_POLARIS11:
2578 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2579 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2580 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2581 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2582 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2583 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2584 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2585 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2586 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2587 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2588 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2589 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2590 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2591 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2592 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2593 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2594 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2595 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2596 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2597 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2598 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2599 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2600 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2601 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2602 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2603 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2604 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2605 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2606 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2607 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2608 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2609 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2610 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2611 				PIPE_CONFIG(ADDR_SURF_P4_16x16));
2612 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2613 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2614 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2615 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2616 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2617 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2618 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2619 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2620 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2621 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2622 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2623 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2624 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2625 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2626 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2627 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2628 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2629 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2630 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2631 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2632 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2633 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2634 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2635 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2636 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2637 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2638 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2639 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2640 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2641 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2642 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2643 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2644 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2645 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2646 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2647 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2648 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2649 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2650 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2651 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2652 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2653 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2654 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2655 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2656 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2657 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2658 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2659 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2660 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2661 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2662 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2663 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2664 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2665 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2666 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2667 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2668 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2669 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2670 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2671 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2672 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2673 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2674 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2675 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2676 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2677 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2678 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2679 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2680 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2681 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2682 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2683 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2684 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2685 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2686 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2687 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2688 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2689 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2690 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2691 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2692 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2693 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2694 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2695 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2696 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2697 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2698 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2699 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2700 
2701 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2702 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2703 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2704 				NUM_BANKS(ADDR_SURF_16_BANK));
2705 
2706 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2707 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2708 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2709 				NUM_BANKS(ADDR_SURF_16_BANK));
2710 
2711 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2712 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2713 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2714 				NUM_BANKS(ADDR_SURF_16_BANK));
2715 
2716 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2717 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2718 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2719 				NUM_BANKS(ADDR_SURF_16_BANK));
2720 
2721 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2722 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2723 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2724 				NUM_BANKS(ADDR_SURF_16_BANK));
2725 
2726 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2728 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2729 				NUM_BANKS(ADDR_SURF_16_BANK));
2730 
2731 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2732 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2733 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2734 				NUM_BANKS(ADDR_SURF_16_BANK));
2735 
2736 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2737 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2738 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2739 				NUM_BANKS(ADDR_SURF_16_BANK));
2740 
2741 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2742 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2743 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2744 				NUM_BANKS(ADDR_SURF_16_BANK));
2745 
2746 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2747 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2748 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2749 				NUM_BANKS(ADDR_SURF_16_BANK));
2750 
2751 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2752 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2753 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2754 				NUM_BANKS(ADDR_SURF_16_BANK));
2755 
2756 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2757 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2758 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2759 				NUM_BANKS(ADDR_SURF_16_BANK));
2760 
2761 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2762 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2763 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2764 				NUM_BANKS(ADDR_SURF_8_BANK));
2765 
2766 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2767 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2768 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2769 				NUM_BANKS(ADDR_SURF_4_BANK));
2770 
2771 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2772 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2773 
2774 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2775 			if (reg_offset != 7)
2776 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2777 
2778 		break;
2779 	case CHIP_POLARIS10:
2780 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2781 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2782 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2783 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2784 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2785 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2786 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2787 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2788 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2789 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2790 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2791 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2792 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2793 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2794 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2795 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2796 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2797 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2798 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2799 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2800 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2801 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2802 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2803 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2804 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2805 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2806 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2807 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2808 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2809 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2810 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2811 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2812 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2813 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2814 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2815 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2816 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2817 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2818 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2819 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2820 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2821 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2822 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2823 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2824 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2825 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2826 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2827 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2828 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2829 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2830 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2831 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2832 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2833 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2834 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2835 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2836 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2837 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2838 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2839 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2840 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2841 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2842 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2843 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2844 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2845 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2846 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2847 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2848 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2849 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2850 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2851 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2852 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2853 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2854 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2855 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2856 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2857 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2858 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2859 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2860 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2861 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2862 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2863 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2864 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2865 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2866 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2867 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2868 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2869 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2870 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2871 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2872 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2873 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2874 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2875 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2876 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2877 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2878 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2879 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2880 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2881 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2882 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2883 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2884 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2885 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2886 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2887 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2888 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2889 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2890 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2891 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2892 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2893 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2894 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2895 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2896 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2897 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2898 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2899 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2900 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2901 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2902 
2903 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2904 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2905 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2906 				NUM_BANKS(ADDR_SURF_16_BANK));
2907 
2908 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2909 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2910 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2911 				NUM_BANKS(ADDR_SURF_16_BANK));
2912 
2913 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2914 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2915 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2916 				NUM_BANKS(ADDR_SURF_16_BANK));
2917 
2918 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2919 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2920 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2921 				NUM_BANKS(ADDR_SURF_16_BANK));
2922 
2923 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2924 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2925 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2926 				NUM_BANKS(ADDR_SURF_16_BANK));
2927 
2928 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2929 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2930 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2931 				NUM_BANKS(ADDR_SURF_16_BANK));
2932 
2933 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2934 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2935 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2936 				NUM_BANKS(ADDR_SURF_16_BANK));
2937 
2938 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2939 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2940 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2941 				NUM_BANKS(ADDR_SURF_16_BANK));
2942 
2943 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2944 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2945 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2946 				NUM_BANKS(ADDR_SURF_16_BANK));
2947 
2948 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2949 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2950 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2951 				NUM_BANKS(ADDR_SURF_16_BANK));
2952 
2953 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2954 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2955 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2956 				NUM_BANKS(ADDR_SURF_16_BANK));
2957 
2958 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2959 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2960 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2961 				NUM_BANKS(ADDR_SURF_8_BANK));
2962 
2963 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2964 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2965 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2966 				NUM_BANKS(ADDR_SURF_4_BANK));
2967 
2968 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2969 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2970 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2971 				NUM_BANKS(ADDR_SURF_4_BANK));
2972 
2973 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2974 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2975 
2976 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2977 			if (reg_offset != 7)
2978 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2979 
2980 		break;
2981 	case CHIP_STONEY:
2982 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2983 				PIPE_CONFIG(ADDR_SURF_P2) |
2984 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2985 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2986 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2987 				PIPE_CONFIG(ADDR_SURF_P2) |
2988 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2989 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2990 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2991 				PIPE_CONFIG(ADDR_SURF_P2) |
2992 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2993 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2994 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2995 				PIPE_CONFIG(ADDR_SURF_P2) |
2996 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2997 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2998 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2999 				PIPE_CONFIG(ADDR_SURF_P2) |
3000 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3001 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3002 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3003 				PIPE_CONFIG(ADDR_SURF_P2) |
3004 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3005 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3006 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3007 				PIPE_CONFIG(ADDR_SURF_P2) |
3008 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3009 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3010 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3011 				PIPE_CONFIG(ADDR_SURF_P2));
3012 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3013 				PIPE_CONFIG(ADDR_SURF_P2) |
3014 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3015 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3016 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3017 				 PIPE_CONFIG(ADDR_SURF_P2) |
3018 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3019 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3020 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3021 				 PIPE_CONFIG(ADDR_SURF_P2) |
3022 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3023 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3024 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3025 				 PIPE_CONFIG(ADDR_SURF_P2) |
3026 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3027 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3028 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3029 				 PIPE_CONFIG(ADDR_SURF_P2) |
3030 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3031 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3032 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3033 				 PIPE_CONFIG(ADDR_SURF_P2) |
3034 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3035 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3036 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3037 				 PIPE_CONFIG(ADDR_SURF_P2) |
3038 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3039 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3040 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3041 				 PIPE_CONFIG(ADDR_SURF_P2) |
3042 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3043 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3044 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3045 				 PIPE_CONFIG(ADDR_SURF_P2) |
3046 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3047 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3048 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3049 				 PIPE_CONFIG(ADDR_SURF_P2) |
3050 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3051 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3052 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3053 				 PIPE_CONFIG(ADDR_SURF_P2) |
3054 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3055 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3056 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3057 				 PIPE_CONFIG(ADDR_SURF_P2) |
3058 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3059 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3060 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3061 				 PIPE_CONFIG(ADDR_SURF_P2) |
3062 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3063 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3064 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3065 				 PIPE_CONFIG(ADDR_SURF_P2) |
3066 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3067 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3068 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3069 				 PIPE_CONFIG(ADDR_SURF_P2) |
3070 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3071 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3072 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3073 				 PIPE_CONFIG(ADDR_SURF_P2) |
3074 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3075 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3076 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3077 				 PIPE_CONFIG(ADDR_SURF_P2) |
3078 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3079 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3080 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3081 				 PIPE_CONFIG(ADDR_SURF_P2) |
3082 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3083 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3084 
3085 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3086 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3087 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3088 				NUM_BANKS(ADDR_SURF_8_BANK));
3089 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3090 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3091 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3092 				NUM_BANKS(ADDR_SURF_8_BANK));
3093 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3094 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3095 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3096 				NUM_BANKS(ADDR_SURF_8_BANK));
3097 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3098 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3099 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3100 				NUM_BANKS(ADDR_SURF_8_BANK));
3101 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3102 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3103 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3104 				NUM_BANKS(ADDR_SURF_8_BANK));
3105 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3106 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3107 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3108 				NUM_BANKS(ADDR_SURF_8_BANK));
3109 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3110 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3111 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3112 				NUM_BANKS(ADDR_SURF_8_BANK));
3113 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3114 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3115 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3116 				NUM_BANKS(ADDR_SURF_16_BANK));
3117 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3118 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3119 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3120 				NUM_BANKS(ADDR_SURF_16_BANK));
3121 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3122 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3123 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3124 				 NUM_BANKS(ADDR_SURF_16_BANK));
3125 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3126 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3127 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3128 				 NUM_BANKS(ADDR_SURF_16_BANK));
3129 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3130 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3131 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3132 				 NUM_BANKS(ADDR_SURF_16_BANK));
3133 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3134 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3135 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3136 				 NUM_BANKS(ADDR_SURF_16_BANK));
3137 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3138 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3139 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3140 				 NUM_BANKS(ADDR_SURF_8_BANK));
3141 
3142 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3143 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3144 			    reg_offset != 23)
3145 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3146 
3147 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3148 			if (reg_offset != 7)
3149 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3150 
3151 		break;
3152 	default:
3153 		dev_warn(adev->dev,
3154 			 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3155 			 adev->asic_type);
3156 
3157 	case CHIP_CARRIZO:
3158 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3159 				PIPE_CONFIG(ADDR_SURF_P2) |
3160 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3161 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3162 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3163 				PIPE_CONFIG(ADDR_SURF_P2) |
3164 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3165 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3166 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3167 				PIPE_CONFIG(ADDR_SURF_P2) |
3168 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3169 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3170 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3171 				PIPE_CONFIG(ADDR_SURF_P2) |
3172 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3173 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3174 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3175 				PIPE_CONFIG(ADDR_SURF_P2) |
3176 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3177 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3178 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3179 				PIPE_CONFIG(ADDR_SURF_P2) |
3180 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3181 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3182 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3183 				PIPE_CONFIG(ADDR_SURF_P2) |
3184 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3185 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3186 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3187 				PIPE_CONFIG(ADDR_SURF_P2));
3188 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3189 				PIPE_CONFIG(ADDR_SURF_P2) |
3190 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3191 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3192 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3193 				 PIPE_CONFIG(ADDR_SURF_P2) |
3194 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3195 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3196 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3197 				 PIPE_CONFIG(ADDR_SURF_P2) |
3198 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3199 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3200 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3201 				 PIPE_CONFIG(ADDR_SURF_P2) |
3202 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3203 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3204 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3205 				 PIPE_CONFIG(ADDR_SURF_P2) |
3206 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3207 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3208 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3209 				 PIPE_CONFIG(ADDR_SURF_P2) |
3210 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3211 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3212 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3213 				 PIPE_CONFIG(ADDR_SURF_P2) |
3214 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3215 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3216 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3217 				 PIPE_CONFIG(ADDR_SURF_P2) |
3218 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3219 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3220 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3221 				 PIPE_CONFIG(ADDR_SURF_P2) |
3222 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3223 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3224 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3225 				 PIPE_CONFIG(ADDR_SURF_P2) |
3226 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3227 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3228 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3229 				 PIPE_CONFIG(ADDR_SURF_P2) |
3230 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3231 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3232 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3233 				 PIPE_CONFIG(ADDR_SURF_P2) |
3234 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3235 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3236 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3237 				 PIPE_CONFIG(ADDR_SURF_P2) |
3238 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3239 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3240 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3241 				 PIPE_CONFIG(ADDR_SURF_P2) |
3242 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3243 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3244 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3245 				 PIPE_CONFIG(ADDR_SURF_P2) |
3246 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3247 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3248 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3249 				 PIPE_CONFIG(ADDR_SURF_P2) |
3250 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3251 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3252 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3253 				 PIPE_CONFIG(ADDR_SURF_P2) |
3254 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3255 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3256 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3257 				 PIPE_CONFIG(ADDR_SURF_P2) |
3258 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3259 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3260 
3261 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3262 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3263 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3264 				NUM_BANKS(ADDR_SURF_8_BANK));
3265 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3266 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3267 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3268 				NUM_BANKS(ADDR_SURF_8_BANK));
3269 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3270 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3271 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3272 				NUM_BANKS(ADDR_SURF_8_BANK));
3273 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3274 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3275 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3276 				NUM_BANKS(ADDR_SURF_8_BANK));
3277 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3278 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3279 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3280 				NUM_BANKS(ADDR_SURF_8_BANK));
3281 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3282 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3283 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3284 				NUM_BANKS(ADDR_SURF_8_BANK));
3285 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3286 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3287 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3288 				NUM_BANKS(ADDR_SURF_8_BANK));
3289 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3290 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3291 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3292 				NUM_BANKS(ADDR_SURF_16_BANK));
3293 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3294 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3295 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3296 				NUM_BANKS(ADDR_SURF_16_BANK));
3297 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3298 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3299 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3300 				 NUM_BANKS(ADDR_SURF_16_BANK));
3301 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3302 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3303 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3304 				 NUM_BANKS(ADDR_SURF_16_BANK));
3305 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3306 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3307 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3308 				 NUM_BANKS(ADDR_SURF_16_BANK));
3309 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3310 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3311 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3312 				 NUM_BANKS(ADDR_SURF_16_BANK));
3313 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3314 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3315 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3316 				 NUM_BANKS(ADDR_SURF_8_BANK));
3317 
3318 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3319 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3320 			    reg_offset != 23)
3321 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3322 
3323 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3324 			if (reg_offset != 7)
3325 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3326 
3327 		break;
3328 	}
3329 }
3330 
3331 void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
3332 {
3333 	u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3334 
3335 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
3336 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3337 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3338 	} else if (se_num == 0xffffffff) {
3339 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3340 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3341 	} else if (sh_num == 0xffffffff) {
3342 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3343 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3344 	} else {
3345 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3346 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3347 	}
3348 	WREG32(mmGRBM_GFX_INDEX, data);
3349 }
3350 
3351 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3352 {
3353 	return (u32)((1ULL << bit_width) - 1);
3354 }
3355 
3356 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3357 {
3358 	u32 data, mask;
3359 
3360 	data = RREG32(mmCC_RB_BACKEND_DISABLE);
3361 	data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3362 
3363 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
3364 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
3365 
3366 	mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3367 				       adev->gfx.config.max_sh_per_se);
3368 
3369 	return (~data) & mask;
3370 }
3371 
3372 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3373 {
3374 	int i, j;
3375 	u32 data;
3376 	u32 active_rbs = 0;
3377 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3378 					adev->gfx.config.max_sh_per_se;
3379 
3380 	mutex_lock(&adev->grbm_idx_mutex);
3381 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3382 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3383 			gfx_v8_0_select_se_sh(adev, i, j);
3384 			data = gfx_v8_0_get_rb_active_bitmap(adev);
3385 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3386 					       rb_bitmap_width_per_sh);
3387 		}
3388 	}
3389 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3390 	mutex_unlock(&adev->grbm_idx_mutex);
3391 
3392 	adev->gfx.config.backend_enable_mask = active_rbs;
3393 	adev->gfx.config.num_rbs = hweight32(active_rbs);
3394 }
3395 
3396 /**
3397  * gfx_v8_0_init_compute_vmid - gart enable
3398  *
3399  * @rdev: amdgpu_device pointer
3400  *
3401  * Initialize compute vmid sh_mem registers
3402  *
3403  */
3404 #define DEFAULT_SH_MEM_BASES	(0x6000)
3405 #define FIRST_COMPUTE_VMID	(8)
3406 #define LAST_COMPUTE_VMID	(16)
3407 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3408 {
3409 	int i;
3410 	uint32_t sh_mem_config;
3411 	uint32_t sh_mem_bases;
3412 
3413 	/*
3414 	 * Configure apertures:
3415 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3416 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3417 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3418 	 */
3419 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3420 
3421 	sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3422 			SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3423 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3424 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3425 			MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3426 			SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3427 
3428 	mutex_lock(&adev->srbm_mutex);
3429 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3430 		vi_srbm_select(adev, 0, 0, 0, i);
3431 		/* CP and shaders */
3432 		WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3433 		WREG32(mmSH_MEM_APE1_BASE, 1);
3434 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3435 		WREG32(mmSH_MEM_BASES, sh_mem_bases);
3436 	}
3437 	vi_srbm_select(adev, 0, 0, 0, 0);
3438 	mutex_unlock(&adev->srbm_mutex);
3439 }
3440 
3441 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3442 {
3443 	u32 tmp;
3444 	int i;
3445 
3446 	tmp = RREG32(mmGRBM_CNTL);
3447 	tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
3448 	WREG32(mmGRBM_CNTL, tmp);
3449 
3450 	WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3451 	WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3452 	WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3453 
3454 	gfx_v8_0_tiling_mode_table_init(adev);
3455 
3456 	gfx_v8_0_setup_rb(adev);
3457 	gfx_v8_0_get_cu_info(adev);
3458 
3459 	/* XXX SH_MEM regs */
3460 	/* where to put LDS, scratch, GPUVM in FSA64 space */
3461 	mutex_lock(&adev->srbm_mutex);
3462 	for (i = 0; i < 16; i++) {
3463 		vi_srbm_select(adev, 0, 0, 0, i);
3464 		/* CP and shaders */
3465 		if (i == 0) {
3466 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3467 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3468 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3469 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3470 			WREG32(mmSH_MEM_CONFIG, tmp);
3471 		} else {
3472 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3473 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3474 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3475 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3476 			WREG32(mmSH_MEM_CONFIG, tmp);
3477 		}
3478 
3479 		WREG32(mmSH_MEM_APE1_BASE, 1);
3480 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3481 		WREG32(mmSH_MEM_BASES, 0);
3482 	}
3483 	vi_srbm_select(adev, 0, 0, 0, 0);
3484 	mutex_unlock(&adev->srbm_mutex);
3485 
3486 	gfx_v8_0_init_compute_vmid(adev);
3487 
3488 	mutex_lock(&adev->grbm_idx_mutex);
3489 	/*
3490 	 * making sure that the following register writes will be broadcasted
3491 	 * to all the shaders
3492 	 */
3493 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3494 
3495 	WREG32(mmPA_SC_FIFO_SIZE,
3496 		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
3497 			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3498 		   (adev->gfx.config.sc_prim_fifo_size_backend <<
3499 			PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3500 		   (adev->gfx.config.sc_hiz_tile_fifo_size <<
3501 			PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3502 		   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3503 			PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3504 	mutex_unlock(&adev->grbm_idx_mutex);
3505 
3506 }
3507 
3508 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3509 {
3510 	u32 i, j, k;
3511 	u32 mask;
3512 
3513 	mutex_lock(&adev->grbm_idx_mutex);
3514 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3515 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3516 			gfx_v8_0_select_se_sh(adev, i, j);
3517 			for (k = 0; k < adev->usec_timeout; k++) {
3518 				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3519 					break;
3520 				udelay(1);
3521 			}
3522 		}
3523 	}
3524 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3525 	mutex_unlock(&adev->grbm_idx_mutex);
3526 
3527 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3528 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3529 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3530 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3531 	for (k = 0; k < adev->usec_timeout; k++) {
3532 		if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3533 			break;
3534 		udelay(1);
3535 	}
3536 }
3537 
3538 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3539 					       bool enable)
3540 {
3541 	u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3542 
3543 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3544 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3545 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3546 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3547 
3548 	WREG32(mmCP_INT_CNTL_RING0, tmp);
3549 }
3550 
3551 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3552 {
3553 	/* csib */
3554 	WREG32(mmRLC_CSIB_ADDR_HI,
3555 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
3556 	WREG32(mmRLC_CSIB_ADDR_LO,
3557 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3558 	WREG32(mmRLC_CSIB_LENGTH,
3559 			adev->gfx.rlc.clear_state_size);
3560 }
3561 
3562 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3563 				int ind_offset,
3564 				int list_size,
3565 				int *unique_indices,
3566 				int *indices_count,
3567 				int max_indices,
3568 				int *ind_start_offsets,
3569 				int *offset_count,
3570 				int max_offset)
3571 {
3572 	int indices;
3573 	bool new_entry = true;
3574 
3575 	for (; ind_offset < list_size; ind_offset++) {
3576 
3577 		if (new_entry) {
3578 			new_entry = false;
3579 			ind_start_offsets[*offset_count] = ind_offset;
3580 			*offset_count = *offset_count + 1;
3581 			BUG_ON(*offset_count >= max_offset);
3582 		}
3583 
3584 		if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3585 			new_entry = true;
3586 			continue;
3587 		}
3588 
3589 		ind_offset += 2;
3590 
3591 		/* look for the matching indice */
3592 		for (indices = 0;
3593 			indices < *indices_count;
3594 			indices++) {
3595 			if (unique_indices[indices] ==
3596 				register_list_format[ind_offset])
3597 				break;
3598 		}
3599 
3600 		if (indices >= *indices_count) {
3601 			unique_indices[*indices_count] =
3602 				register_list_format[ind_offset];
3603 			indices = *indices_count;
3604 			*indices_count = *indices_count + 1;
3605 			BUG_ON(*indices_count >= max_indices);
3606 		}
3607 
3608 		register_list_format[ind_offset] = indices;
3609 	}
3610 }
3611 
3612 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3613 {
3614 	int i, temp, data;
3615 	int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3616 	int indices_count = 0;
3617 	int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3618 	int offset_count = 0;
3619 
3620 	int list_size;
3621 	unsigned int *register_list_format =
3622 		kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3623 	if (register_list_format == NULL)
3624 		return -ENOMEM;
3625 	memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3626 			adev->gfx.rlc.reg_list_format_size_bytes);
3627 
3628 	gfx_v8_0_parse_ind_reg_list(register_list_format,
3629 				RLC_FormatDirectRegListLength,
3630 				adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3631 				unique_indices,
3632 				&indices_count,
3633 				sizeof(unique_indices) / sizeof(int),
3634 				indirect_start_offsets,
3635 				&offset_count,
3636 				sizeof(indirect_start_offsets)/sizeof(int));
3637 
3638 	/* save and restore list */
3639 	temp = RREG32(mmRLC_SRM_CNTL);
3640 	temp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
3641 	WREG32(mmRLC_SRM_CNTL, temp);
3642 
3643 	WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3644 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3645 		WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3646 
3647 	/* indirect list */
3648 	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3649 	for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3650 		WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3651 
3652 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3653 	list_size = list_size >> 1;
3654 	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3655 	WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3656 
3657 	/* starting offsets starts */
3658 	WREG32(mmRLC_GPM_SCRATCH_ADDR,
3659 		adev->gfx.rlc.starting_offsets_start);
3660 	for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3661 		WREG32(mmRLC_GPM_SCRATCH_DATA,
3662 				indirect_start_offsets[i]);
3663 
3664 	/* unique indices */
3665 	temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3666 	data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3667 	for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3668 		amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false);
3669 		amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false);
3670 	}
3671 	kfree(register_list_format);
3672 
3673 	return 0;
3674 }
3675 
3676 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3677 {
3678 	uint32_t data;
3679 
3680 	data = RREG32(mmRLC_SRM_CNTL);
3681 	data |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
3682 	WREG32(mmRLC_SRM_CNTL, data);
3683 }
3684 
3685 static void polaris11_init_power_gating(struct amdgpu_device *adev)
3686 {
3687 	uint32_t data;
3688 
3689 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3690 			AMD_PG_SUPPORT_GFX_SMG |
3691 			AMD_PG_SUPPORT_GFX_DMG)) {
3692 		data = RREG32(mmCP_RB_WPTR_POLL_CNTL);
3693 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
3694 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
3695 		WREG32(mmCP_RB_WPTR_POLL_CNTL, data);
3696 
3697 		data = 0;
3698 		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
3699 		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
3700 		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
3701 		data |= (0x10 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
3702 		WREG32(mmRLC_PG_DELAY, data);
3703 
3704 		data = RREG32(mmRLC_PG_DELAY_2);
3705 		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
3706 		data |= (0x3 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
3707 		WREG32(mmRLC_PG_DELAY_2, data);
3708 
3709 		data = RREG32(mmRLC_AUTO_PG_CTRL);
3710 		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
3711 		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
3712 		WREG32(mmRLC_AUTO_PG_CTRL, data);
3713 	}
3714 }
3715 
3716 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
3717 {
3718 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3719 			      AMD_PG_SUPPORT_GFX_SMG |
3720 			      AMD_PG_SUPPORT_GFX_DMG |
3721 			      AMD_PG_SUPPORT_CP |
3722 			      AMD_PG_SUPPORT_GDS |
3723 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
3724 		gfx_v8_0_init_csb(adev);
3725 		gfx_v8_0_init_save_restore_list(adev);
3726 		gfx_v8_0_enable_save_restore_machine(adev);
3727 
3728 		if (adev->asic_type == CHIP_POLARIS11)
3729 			polaris11_init_power_gating(adev);
3730 	}
3731 }
3732 
3733 void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
3734 {
3735 	u32 tmp = RREG32(mmRLC_CNTL);
3736 
3737 	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
3738 	WREG32(mmRLC_CNTL, tmp);
3739 
3740 	gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3741 
3742 	gfx_v8_0_wait_for_rlc_serdes(adev);
3743 }
3744 
3745 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
3746 {
3747 	u32 tmp = RREG32(mmGRBM_SOFT_RESET);
3748 
3749 	tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3750 	WREG32(mmGRBM_SOFT_RESET, tmp);
3751 	udelay(50);
3752 	tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3753 	WREG32(mmGRBM_SOFT_RESET, tmp);
3754 	udelay(50);
3755 }
3756 
3757 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
3758 {
3759 	u32 tmp = RREG32(mmRLC_CNTL);
3760 
3761 	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
3762 	WREG32(mmRLC_CNTL, tmp);
3763 
3764 	/* carrizo do enable cp interrupt after cp inited */
3765 	if (!(adev->flags & AMD_IS_APU))
3766 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3767 
3768 	udelay(50);
3769 }
3770 
3771 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
3772 {
3773 	const struct rlc_firmware_header_v2_0 *hdr;
3774 	const __le32 *fw_data;
3775 	unsigned i, fw_size;
3776 
3777 	if (!adev->gfx.rlc_fw)
3778 		return -EINVAL;
3779 
3780 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3781 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
3782 
3783 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3784 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3785 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3786 
3787 	WREG32(mmRLC_GPM_UCODE_ADDR, 0);
3788 	for (i = 0; i < fw_size; i++)
3789 		WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3790 	WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3791 
3792 	return 0;
3793 }
3794 
3795 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
3796 {
3797 	int r;
3798 
3799 	gfx_v8_0_rlc_stop(adev);
3800 
3801 	/* disable CG */
3802 	WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
3803 	if (adev->asic_type == CHIP_POLARIS11 ||
3804 		adev->asic_type == CHIP_POLARIS10)
3805 		WREG32(mmRLC_CGCG_CGLS_CTRL_3D, 0);
3806 
3807 	/* disable PG */
3808 	WREG32(mmRLC_PG_CNTL, 0);
3809 
3810 	gfx_v8_0_rlc_reset(adev);
3811 
3812 	gfx_v8_0_init_pg(adev);
3813 
3814 	if (!adev->pp_enabled) {
3815 		if (!adev->firmware.smu_load) {
3816 			/* legacy rlc firmware loading */
3817 			r = gfx_v8_0_rlc_load_microcode(adev);
3818 			if (r)
3819 				return r;
3820 		} else {
3821 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3822 							AMDGPU_UCODE_ID_RLC_G);
3823 			if (r)
3824 				return -EINVAL;
3825 		}
3826 	}
3827 
3828 	gfx_v8_0_rlc_start(adev);
3829 
3830 	return 0;
3831 }
3832 
3833 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3834 {
3835 	int i;
3836 	u32 tmp = RREG32(mmCP_ME_CNTL);
3837 
3838 	if (enable) {
3839 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
3840 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
3841 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
3842 	} else {
3843 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
3844 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
3845 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
3846 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3847 			adev->gfx.gfx_ring[i].ready = false;
3848 	}
3849 	WREG32(mmCP_ME_CNTL, tmp);
3850 	udelay(50);
3851 }
3852 
3853 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3854 {
3855 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
3856 	const struct gfx_firmware_header_v1_0 *ce_hdr;
3857 	const struct gfx_firmware_header_v1_0 *me_hdr;
3858 	const __le32 *fw_data;
3859 	unsigned i, fw_size;
3860 
3861 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3862 		return -EINVAL;
3863 
3864 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3865 		adev->gfx.pfp_fw->data;
3866 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3867 		adev->gfx.ce_fw->data;
3868 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
3869 		adev->gfx.me_fw->data;
3870 
3871 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3872 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3873 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3874 
3875 	gfx_v8_0_cp_gfx_enable(adev, false);
3876 
3877 	/* PFP */
3878 	fw_data = (const __le32 *)
3879 		(adev->gfx.pfp_fw->data +
3880 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3881 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3882 	WREG32(mmCP_PFP_UCODE_ADDR, 0);
3883 	for (i = 0; i < fw_size; i++)
3884 		WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3885 	WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3886 
3887 	/* CE */
3888 	fw_data = (const __le32 *)
3889 		(adev->gfx.ce_fw->data +
3890 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3891 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3892 	WREG32(mmCP_CE_UCODE_ADDR, 0);
3893 	for (i = 0; i < fw_size; i++)
3894 		WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3895 	WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3896 
3897 	/* ME */
3898 	fw_data = (const __le32 *)
3899 		(adev->gfx.me_fw->data +
3900 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3901 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3902 	WREG32(mmCP_ME_RAM_WADDR, 0);
3903 	for (i = 0; i < fw_size; i++)
3904 		WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3905 	WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3906 
3907 	return 0;
3908 }
3909 
3910 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
3911 {
3912 	u32 count = 0;
3913 	const struct cs_section_def *sect = NULL;
3914 	const struct cs_extent_def *ext = NULL;
3915 
3916 	/* begin clear state */
3917 	count += 2;
3918 	/* context control state */
3919 	count += 3;
3920 
3921 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3922 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3923 			if (sect->id == SECT_CONTEXT)
3924 				count += 2 + ext->reg_count;
3925 			else
3926 				return 0;
3927 		}
3928 	}
3929 	/* pa_sc_raster_config/pa_sc_raster_config1 */
3930 	count += 4;
3931 	/* end clear state */
3932 	count += 2;
3933 	/* clear state */
3934 	count += 2;
3935 
3936 	return count;
3937 }
3938 
3939 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
3940 {
3941 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3942 	const struct cs_section_def *sect = NULL;
3943 	const struct cs_extent_def *ext = NULL;
3944 	int r, i;
3945 
3946 	/* init the CP */
3947 	WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3948 	WREG32(mmCP_ENDIAN_SWAP, 0);
3949 	WREG32(mmCP_DEVICE_ID, 1);
3950 
3951 	gfx_v8_0_cp_gfx_enable(adev, true);
3952 
3953 	r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
3954 	if (r) {
3955 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3956 		return r;
3957 	}
3958 
3959 	/* clear state buffer */
3960 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3961 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3962 
3963 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3964 	amdgpu_ring_write(ring, 0x80000000);
3965 	amdgpu_ring_write(ring, 0x80000000);
3966 
3967 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3968 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3969 			if (sect->id == SECT_CONTEXT) {
3970 				amdgpu_ring_write(ring,
3971 				       PACKET3(PACKET3_SET_CONTEXT_REG,
3972 					       ext->reg_count));
3973 				amdgpu_ring_write(ring,
3974 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3975 				for (i = 0; i < ext->reg_count; i++)
3976 					amdgpu_ring_write(ring, ext->extent[i]);
3977 			}
3978 		}
3979 	}
3980 
3981 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3982 	amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
3983 	switch (adev->asic_type) {
3984 	case CHIP_TONGA:
3985 	case CHIP_POLARIS10:
3986 		amdgpu_ring_write(ring, 0x16000012);
3987 		amdgpu_ring_write(ring, 0x0000002A);
3988 		break;
3989 	case CHIP_POLARIS11:
3990 		amdgpu_ring_write(ring, 0x16000012);
3991 		amdgpu_ring_write(ring, 0x00000000);
3992 		break;
3993 	case CHIP_FIJI:
3994 		amdgpu_ring_write(ring, 0x3a00161a);
3995 		amdgpu_ring_write(ring, 0x0000002e);
3996 		break;
3997 	case CHIP_CARRIZO:
3998 		amdgpu_ring_write(ring, 0x00000002);
3999 		amdgpu_ring_write(ring, 0x00000000);
4000 		break;
4001 	case CHIP_TOPAZ:
4002 		amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4003 				0x00000000 : 0x00000002);
4004 		amdgpu_ring_write(ring, 0x00000000);
4005 		break;
4006 	case CHIP_STONEY:
4007 		amdgpu_ring_write(ring, 0x00000000);
4008 		amdgpu_ring_write(ring, 0x00000000);
4009 		break;
4010 	default:
4011 		BUG();
4012 	}
4013 
4014 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4015 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4016 
4017 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4018 	amdgpu_ring_write(ring, 0);
4019 
4020 	/* init the CE partitions */
4021 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4022 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4023 	amdgpu_ring_write(ring, 0x8000);
4024 	amdgpu_ring_write(ring, 0x8000);
4025 
4026 	amdgpu_ring_commit(ring);
4027 
4028 	return 0;
4029 }
4030 
4031 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4032 {
4033 	struct amdgpu_ring *ring;
4034 	u32 tmp;
4035 	u32 rb_bufsz;
4036 	u64 rb_addr, rptr_addr;
4037 	int r;
4038 
4039 	/* Set the write pointer delay */
4040 	WREG32(mmCP_RB_WPTR_DELAY, 0);
4041 
4042 	/* set the RB to use vmid 0 */
4043 	WREG32(mmCP_RB_VMID, 0);
4044 
4045 	/* Set ring buffer size */
4046 	ring = &adev->gfx.gfx_ring[0];
4047 	rb_bufsz = order_base_2(ring->ring_size / 8);
4048 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4049 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4050 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4051 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4052 #ifdef __BIG_ENDIAN
4053 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4054 #endif
4055 	WREG32(mmCP_RB0_CNTL, tmp);
4056 
4057 	/* Initialize the ring buffer's read and write pointers */
4058 	WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4059 	ring->wptr = 0;
4060 	WREG32(mmCP_RB0_WPTR, ring->wptr);
4061 
4062 	/* set the wb address wether it's enabled or not */
4063 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4064 	WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4065 	WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4066 
4067 	mdelay(1);
4068 	WREG32(mmCP_RB0_CNTL, tmp);
4069 
4070 	rb_addr = ring->gpu_addr >> 8;
4071 	WREG32(mmCP_RB0_BASE, rb_addr);
4072 	WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4073 
4074 	/* no gfx doorbells on iceland */
4075 	if (adev->asic_type != CHIP_TOPAZ) {
4076 		tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4077 		if (ring->use_doorbell) {
4078 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4079 					    DOORBELL_OFFSET, ring->doorbell_index);
4080 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4081 					    DOORBELL_HIT, 0);
4082 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4083 					    DOORBELL_EN, 1);
4084 		} else {
4085 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4086 					    DOORBELL_EN, 0);
4087 		}
4088 		WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4089 
4090 		if (adev->asic_type == CHIP_TONGA) {
4091 			tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4092 					    DOORBELL_RANGE_LOWER,
4093 					    AMDGPU_DOORBELL_GFX_RING0);
4094 			WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4095 
4096 			WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4097 			       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4098 		}
4099 
4100 	}
4101 
4102 	/* start the ring */
4103 	gfx_v8_0_cp_gfx_start(adev);
4104 	ring->ready = true;
4105 	r = amdgpu_ring_test_ring(ring);
4106 	if (r) {
4107 		ring->ready = false;
4108 		return r;
4109 	}
4110 
4111 	return 0;
4112 }
4113 
4114 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4115 {
4116 	int i;
4117 
4118 	if (enable) {
4119 		WREG32(mmCP_MEC_CNTL, 0);
4120 	} else {
4121 		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4122 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
4123 			adev->gfx.compute_ring[i].ready = false;
4124 	}
4125 	udelay(50);
4126 }
4127 
4128 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4129 {
4130 	const struct gfx_firmware_header_v1_0 *mec_hdr;
4131 	const __le32 *fw_data;
4132 	unsigned i, fw_size;
4133 
4134 	if (!adev->gfx.mec_fw)
4135 		return -EINVAL;
4136 
4137 	gfx_v8_0_cp_compute_enable(adev, false);
4138 
4139 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4140 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4141 
4142 	fw_data = (const __le32 *)
4143 		(adev->gfx.mec_fw->data +
4144 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4145 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4146 
4147 	/* MEC1 */
4148 	WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4149 	for (i = 0; i < fw_size; i++)
4150 		WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4151 	WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4152 
4153 	/* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4154 	if (adev->gfx.mec2_fw) {
4155 		const struct gfx_firmware_header_v1_0 *mec2_hdr;
4156 
4157 		mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4158 		amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4159 
4160 		fw_data = (const __le32 *)
4161 			(adev->gfx.mec2_fw->data +
4162 			 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4163 		fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4164 
4165 		WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4166 		for (i = 0; i < fw_size; i++)
4167 			WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4168 		WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4169 	}
4170 
4171 	return 0;
4172 }
4173 
4174 struct vi_mqd {
4175 	uint32_t header;  /* ordinal0 */
4176 	uint32_t compute_dispatch_initiator;  /* ordinal1 */
4177 	uint32_t compute_dim_x;  /* ordinal2 */
4178 	uint32_t compute_dim_y;  /* ordinal3 */
4179 	uint32_t compute_dim_z;  /* ordinal4 */
4180 	uint32_t compute_start_x;  /* ordinal5 */
4181 	uint32_t compute_start_y;  /* ordinal6 */
4182 	uint32_t compute_start_z;  /* ordinal7 */
4183 	uint32_t compute_num_thread_x;  /* ordinal8 */
4184 	uint32_t compute_num_thread_y;  /* ordinal9 */
4185 	uint32_t compute_num_thread_z;  /* ordinal10 */
4186 	uint32_t compute_pipelinestat_enable;  /* ordinal11 */
4187 	uint32_t compute_perfcount_enable;  /* ordinal12 */
4188 	uint32_t compute_pgm_lo;  /* ordinal13 */
4189 	uint32_t compute_pgm_hi;  /* ordinal14 */
4190 	uint32_t compute_tba_lo;  /* ordinal15 */
4191 	uint32_t compute_tba_hi;  /* ordinal16 */
4192 	uint32_t compute_tma_lo;  /* ordinal17 */
4193 	uint32_t compute_tma_hi;  /* ordinal18 */
4194 	uint32_t compute_pgm_rsrc1;  /* ordinal19 */
4195 	uint32_t compute_pgm_rsrc2;  /* ordinal20 */
4196 	uint32_t compute_vmid;  /* ordinal21 */
4197 	uint32_t compute_resource_limits;  /* ordinal22 */
4198 	uint32_t compute_static_thread_mgmt_se0;  /* ordinal23 */
4199 	uint32_t compute_static_thread_mgmt_se1;  /* ordinal24 */
4200 	uint32_t compute_tmpring_size;  /* ordinal25 */
4201 	uint32_t compute_static_thread_mgmt_se2;  /* ordinal26 */
4202 	uint32_t compute_static_thread_mgmt_se3;  /* ordinal27 */
4203 	uint32_t compute_restart_x;  /* ordinal28 */
4204 	uint32_t compute_restart_y;  /* ordinal29 */
4205 	uint32_t compute_restart_z;  /* ordinal30 */
4206 	uint32_t compute_thread_trace_enable;  /* ordinal31 */
4207 	uint32_t compute_misc_reserved;  /* ordinal32 */
4208 	uint32_t compute_dispatch_id;  /* ordinal33 */
4209 	uint32_t compute_threadgroup_id;  /* ordinal34 */
4210 	uint32_t compute_relaunch;  /* ordinal35 */
4211 	uint32_t compute_wave_restore_addr_lo;  /* ordinal36 */
4212 	uint32_t compute_wave_restore_addr_hi;  /* ordinal37 */
4213 	uint32_t compute_wave_restore_control;  /* ordinal38 */
4214 	uint32_t reserved9;  /* ordinal39 */
4215 	uint32_t reserved10;  /* ordinal40 */
4216 	uint32_t reserved11;  /* ordinal41 */
4217 	uint32_t reserved12;  /* ordinal42 */
4218 	uint32_t reserved13;  /* ordinal43 */
4219 	uint32_t reserved14;  /* ordinal44 */
4220 	uint32_t reserved15;  /* ordinal45 */
4221 	uint32_t reserved16;  /* ordinal46 */
4222 	uint32_t reserved17;  /* ordinal47 */
4223 	uint32_t reserved18;  /* ordinal48 */
4224 	uint32_t reserved19;  /* ordinal49 */
4225 	uint32_t reserved20;  /* ordinal50 */
4226 	uint32_t reserved21;  /* ordinal51 */
4227 	uint32_t reserved22;  /* ordinal52 */
4228 	uint32_t reserved23;  /* ordinal53 */
4229 	uint32_t reserved24;  /* ordinal54 */
4230 	uint32_t reserved25;  /* ordinal55 */
4231 	uint32_t reserved26;  /* ordinal56 */
4232 	uint32_t reserved27;  /* ordinal57 */
4233 	uint32_t reserved28;  /* ordinal58 */
4234 	uint32_t reserved29;  /* ordinal59 */
4235 	uint32_t reserved30;  /* ordinal60 */
4236 	uint32_t reserved31;  /* ordinal61 */
4237 	uint32_t reserved32;  /* ordinal62 */
4238 	uint32_t reserved33;  /* ordinal63 */
4239 	uint32_t reserved34;  /* ordinal64 */
4240 	uint32_t compute_user_data_0;  /* ordinal65 */
4241 	uint32_t compute_user_data_1;  /* ordinal66 */
4242 	uint32_t compute_user_data_2;  /* ordinal67 */
4243 	uint32_t compute_user_data_3;  /* ordinal68 */
4244 	uint32_t compute_user_data_4;  /* ordinal69 */
4245 	uint32_t compute_user_data_5;  /* ordinal70 */
4246 	uint32_t compute_user_data_6;  /* ordinal71 */
4247 	uint32_t compute_user_data_7;  /* ordinal72 */
4248 	uint32_t compute_user_data_8;  /* ordinal73 */
4249 	uint32_t compute_user_data_9;  /* ordinal74 */
4250 	uint32_t compute_user_data_10;  /* ordinal75 */
4251 	uint32_t compute_user_data_11;  /* ordinal76 */
4252 	uint32_t compute_user_data_12;  /* ordinal77 */
4253 	uint32_t compute_user_data_13;  /* ordinal78 */
4254 	uint32_t compute_user_data_14;  /* ordinal79 */
4255 	uint32_t compute_user_data_15;  /* ordinal80 */
4256 	uint32_t cp_compute_csinvoc_count_lo;  /* ordinal81 */
4257 	uint32_t cp_compute_csinvoc_count_hi;  /* ordinal82 */
4258 	uint32_t reserved35;  /* ordinal83 */
4259 	uint32_t reserved36;  /* ordinal84 */
4260 	uint32_t reserved37;  /* ordinal85 */
4261 	uint32_t cp_mqd_query_time_lo;  /* ordinal86 */
4262 	uint32_t cp_mqd_query_time_hi;  /* ordinal87 */
4263 	uint32_t cp_mqd_connect_start_time_lo;  /* ordinal88 */
4264 	uint32_t cp_mqd_connect_start_time_hi;  /* ordinal89 */
4265 	uint32_t cp_mqd_connect_end_time_lo;  /* ordinal90 */
4266 	uint32_t cp_mqd_connect_end_time_hi;  /* ordinal91 */
4267 	uint32_t cp_mqd_connect_end_wf_count;  /* ordinal92 */
4268 	uint32_t cp_mqd_connect_end_pq_rptr;  /* ordinal93 */
4269 	uint32_t cp_mqd_connect_end_pq_wptr;  /* ordinal94 */
4270 	uint32_t cp_mqd_connect_end_ib_rptr;  /* ordinal95 */
4271 	uint32_t reserved38;  /* ordinal96 */
4272 	uint32_t reserved39;  /* ordinal97 */
4273 	uint32_t cp_mqd_save_start_time_lo;  /* ordinal98 */
4274 	uint32_t cp_mqd_save_start_time_hi;  /* ordinal99 */
4275 	uint32_t cp_mqd_save_end_time_lo;  /* ordinal100 */
4276 	uint32_t cp_mqd_save_end_time_hi;  /* ordinal101 */
4277 	uint32_t cp_mqd_restore_start_time_lo;  /* ordinal102 */
4278 	uint32_t cp_mqd_restore_start_time_hi;  /* ordinal103 */
4279 	uint32_t cp_mqd_restore_end_time_lo;  /* ordinal104 */
4280 	uint32_t cp_mqd_restore_end_time_hi;  /* ordinal105 */
4281 	uint32_t reserved40;  /* ordinal106 */
4282 	uint32_t reserved41;  /* ordinal107 */
4283 	uint32_t gds_cs_ctxsw_cnt0;  /* ordinal108 */
4284 	uint32_t gds_cs_ctxsw_cnt1;  /* ordinal109 */
4285 	uint32_t gds_cs_ctxsw_cnt2;  /* ordinal110 */
4286 	uint32_t gds_cs_ctxsw_cnt3;  /* ordinal111 */
4287 	uint32_t reserved42;  /* ordinal112 */
4288 	uint32_t reserved43;  /* ordinal113 */
4289 	uint32_t cp_pq_exe_status_lo;  /* ordinal114 */
4290 	uint32_t cp_pq_exe_status_hi;  /* ordinal115 */
4291 	uint32_t cp_packet_id_lo;  /* ordinal116 */
4292 	uint32_t cp_packet_id_hi;  /* ordinal117 */
4293 	uint32_t cp_packet_exe_status_lo;  /* ordinal118 */
4294 	uint32_t cp_packet_exe_status_hi;  /* ordinal119 */
4295 	uint32_t gds_save_base_addr_lo;  /* ordinal120 */
4296 	uint32_t gds_save_base_addr_hi;  /* ordinal121 */
4297 	uint32_t gds_save_mask_lo;  /* ordinal122 */
4298 	uint32_t gds_save_mask_hi;  /* ordinal123 */
4299 	uint32_t ctx_save_base_addr_lo;  /* ordinal124 */
4300 	uint32_t ctx_save_base_addr_hi;  /* ordinal125 */
4301 	uint32_t reserved44;  /* ordinal126 */
4302 	uint32_t reserved45;  /* ordinal127 */
4303 	uint32_t cp_mqd_base_addr_lo;  /* ordinal128 */
4304 	uint32_t cp_mqd_base_addr_hi;  /* ordinal129 */
4305 	uint32_t cp_hqd_active;  /* ordinal130 */
4306 	uint32_t cp_hqd_vmid;  /* ordinal131 */
4307 	uint32_t cp_hqd_persistent_state;  /* ordinal132 */
4308 	uint32_t cp_hqd_pipe_priority;  /* ordinal133 */
4309 	uint32_t cp_hqd_queue_priority;  /* ordinal134 */
4310 	uint32_t cp_hqd_quantum;  /* ordinal135 */
4311 	uint32_t cp_hqd_pq_base_lo;  /* ordinal136 */
4312 	uint32_t cp_hqd_pq_base_hi;  /* ordinal137 */
4313 	uint32_t cp_hqd_pq_rptr;  /* ordinal138 */
4314 	uint32_t cp_hqd_pq_rptr_report_addr_lo;  /* ordinal139 */
4315 	uint32_t cp_hqd_pq_rptr_report_addr_hi;  /* ordinal140 */
4316 	uint32_t cp_hqd_pq_wptr_poll_addr;  /* ordinal141 */
4317 	uint32_t cp_hqd_pq_wptr_poll_addr_hi;  /* ordinal142 */
4318 	uint32_t cp_hqd_pq_doorbell_control;  /* ordinal143 */
4319 	uint32_t cp_hqd_pq_wptr;  /* ordinal144 */
4320 	uint32_t cp_hqd_pq_control;  /* ordinal145 */
4321 	uint32_t cp_hqd_ib_base_addr_lo;  /* ordinal146 */
4322 	uint32_t cp_hqd_ib_base_addr_hi;  /* ordinal147 */
4323 	uint32_t cp_hqd_ib_rptr;  /* ordinal148 */
4324 	uint32_t cp_hqd_ib_control;  /* ordinal149 */
4325 	uint32_t cp_hqd_iq_timer;  /* ordinal150 */
4326 	uint32_t cp_hqd_iq_rptr;  /* ordinal151 */
4327 	uint32_t cp_hqd_dequeue_request;  /* ordinal152 */
4328 	uint32_t cp_hqd_dma_offload;  /* ordinal153 */
4329 	uint32_t cp_hqd_sema_cmd;  /* ordinal154 */
4330 	uint32_t cp_hqd_msg_type;  /* ordinal155 */
4331 	uint32_t cp_hqd_atomic0_preop_lo;  /* ordinal156 */
4332 	uint32_t cp_hqd_atomic0_preop_hi;  /* ordinal157 */
4333 	uint32_t cp_hqd_atomic1_preop_lo;  /* ordinal158 */
4334 	uint32_t cp_hqd_atomic1_preop_hi;  /* ordinal159 */
4335 	uint32_t cp_hqd_hq_status0;  /* ordinal160 */
4336 	uint32_t cp_hqd_hq_control0;  /* ordinal161 */
4337 	uint32_t cp_mqd_control;  /* ordinal162 */
4338 	uint32_t cp_hqd_hq_status1;  /* ordinal163 */
4339 	uint32_t cp_hqd_hq_control1;  /* ordinal164 */
4340 	uint32_t cp_hqd_eop_base_addr_lo;  /* ordinal165 */
4341 	uint32_t cp_hqd_eop_base_addr_hi;  /* ordinal166 */
4342 	uint32_t cp_hqd_eop_control;  /* ordinal167 */
4343 	uint32_t cp_hqd_eop_rptr;  /* ordinal168 */
4344 	uint32_t cp_hqd_eop_wptr;  /* ordinal169 */
4345 	uint32_t cp_hqd_eop_done_events;  /* ordinal170 */
4346 	uint32_t cp_hqd_ctx_save_base_addr_lo;  /* ordinal171 */
4347 	uint32_t cp_hqd_ctx_save_base_addr_hi;  /* ordinal172 */
4348 	uint32_t cp_hqd_ctx_save_control;  /* ordinal173 */
4349 	uint32_t cp_hqd_cntl_stack_offset;  /* ordinal174 */
4350 	uint32_t cp_hqd_cntl_stack_size;  /* ordinal175 */
4351 	uint32_t cp_hqd_wg_state_offset;  /* ordinal176 */
4352 	uint32_t cp_hqd_ctx_save_size;  /* ordinal177 */
4353 	uint32_t cp_hqd_gds_resource_state;  /* ordinal178 */
4354 	uint32_t cp_hqd_error;  /* ordinal179 */
4355 	uint32_t cp_hqd_eop_wptr_mem;  /* ordinal180 */
4356 	uint32_t cp_hqd_eop_dones;  /* ordinal181 */
4357 	uint32_t reserved46;  /* ordinal182 */
4358 	uint32_t reserved47;  /* ordinal183 */
4359 	uint32_t reserved48;  /* ordinal184 */
4360 	uint32_t reserved49;  /* ordinal185 */
4361 	uint32_t reserved50;  /* ordinal186 */
4362 	uint32_t reserved51;  /* ordinal187 */
4363 	uint32_t reserved52;  /* ordinal188 */
4364 	uint32_t reserved53;  /* ordinal189 */
4365 	uint32_t reserved54;  /* ordinal190 */
4366 	uint32_t reserved55;  /* ordinal191 */
4367 	uint32_t iqtimer_pkt_header;  /* ordinal192 */
4368 	uint32_t iqtimer_pkt_dw0;  /* ordinal193 */
4369 	uint32_t iqtimer_pkt_dw1;  /* ordinal194 */
4370 	uint32_t iqtimer_pkt_dw2;  /* ordinal195 */
4371 	uint32_t iqtimer_pkt_dw3;  /* ordinal196 */
4372 	uint32_t iqtimer_pkt_dw4;  /* ordinal197 */
4373 	uint32_t iqtimer_pkt_dw5;  /* ordinal198 */
4374 	uint32_t iqtimer_pkt_dw6;  /* ordinal199 */
4375 	uint32_t iqtimer_pkt_dw7;  /* ordinal200 */
4376 	uint32_t iqtimer_pkt_dw8;  /* ordinal201 */
4377 	uint32_t iqtimer_pkt_dw9;  /* ordinal202 */
4378 	uint32_t iqtimer_pkt_dw10;  /* ordinal203 */
4379 	uint32_t iqtimer_pkt_dw11;  /* ordinal204 */
4380 	uint32_t iqtimer_pkt_dw12;  /* ordinal205 */
4381 	uint32_t iqtimer_pkt_dw13;  /* ordinal206 */
4382 	uint32_t iqtimer_pkt_dw14;  /* ordinal207 */
4383 	uint32_t iqtimer_pkt_dw15;  /* ordinal208 */
4384 	uint32_t iqtimer_pkt_dw16;  /* ordinal209 */
4385 	uint32_t iqtimer_pkt_dw17;  /* ordinal210 */
4386 	uint32_t iqtimer_pkt_dw18;  /* ordinal211 */
4387 	uint32_t iqtimer_pkt_dw19;  /* ordinal212 */
4388 	uint32_t iqtimer_pkt_dw20;  /* ordinal213 */
4389 	uint32_t iqtimer_pkt_dw21;  /* ordinal214 */
4390 	uint32_t iqtimer_pkt_dw22;  /* ordinal215 */
4391 	uint32_t iqtimer_pkt_dw23;  /* ordinal216 */
4392 	uint32_t iqtimer_pkt_dw24;  /* ordinal217 */
4393 	uint32_t iqtimer_pkt_dw25;  /* ordinal218 */
4394 	uint32_t iqtimer_pkt_dw26;  /* ordinal219 */
4395 	uint32_t iqtimer_pkt_dw27;  /* ordinal220 */
4396 	uint32_t iqtimer_pkt_dw28;  /* ordinal221 */
4397 	uint32_t iqtimer_pkt_dw29;  /* ordinal222 */
4398 	uint32_t iqtimer_pkt_dw30;  /* ordinal223 */
4399 	uint32_t iqtimer_pkt_dw31;  /* ordinal224 */
4400 	uint32_t reserved56;  /* ordinal225 */
4401 	uint32_t reserved57;  /* ordinal226 */
4402 	uint32_t reserved58;  /* ordinal227 */
4403 	uint32_t set_resources_header;  /* ordinal228 */
4404 	uint32_t set_resources_dw1;  /* ordinal229 */
4405 	uint32_t set_resources_dw2;  /* ordinal230 */
4406 	uint32_t set_resources_dw3;  /* ordinal231 */
4407 	uint32_t set_resources_dw4;  /* ordinal232 */
4408 	uint32_t set_resources_dw5;  /* ordinal233 */
4409 	uint32_t set_resources_dw6;  /* ordinal234 */
4410 	uint32_t set_resources_dw7;  /* ordinal235 */
4411 	uint32_t reserved59;  /* ordinal236 */
4412 	uint32_t reserved60;  /* ordinal237 */
4413 	uint32_t reserved61;  /* ordinal238 */
4414 	uint32_t reserved62;  /* ordinal239 */
4415 	uint32_t reserved63;  /* ordinal240 */
4416 	uint32_t reserved64;  /* ordinal241 */
4417 	uint32_t reserved65;  /* ordinal242 */
4418 	uint32_t reserved66;  /* ordinal243 */
4419 	uint32_t reserved67;  /* ordinal244 */
4420 	uint32_t reserved68;  /* ordinal245 */
4421 	uint32_t reserved69;  /* ordinal246 */
4422 	uint32_t reserved70;  /* ordinal247 */
4423 	uint32_t reserved71;  /* ordinal248 */
4424 	uint32_t reserved72;  /* ordinal249 */
4425 	uint32_t reserved73;  /* ordinal250 */
4426 	uint32_t reserved74;  /* ordinal251 */
4427 	uint32_t reserved75;  /* ordinal252 */
4428 	uint32_t reserved76;  /* ordinal253 */
4429 	uint32_t reserved77;  /* ordinal254 */
4430 	uint32_t reserved78;  /* ordinal255 */
4431 
4432 	uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
4433 };
4434 
4435 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4436 {
4437 	int i, r;
4438 
4439 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4440 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4441 
4442 		if (ring->mqd_obj) {
4443 			r = amdgpu_bo_reserve(ring->mqd_obj, false);
4444 			if (unlikely(r != 0))
4445 				dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4446 
4447 			amdgpu_bo_unpin(ring->mqd_obj);
4448 			amdgpu_bo_unreserve(ring->mqd_obj);
4449 
4450 			amdgpu_bo_unref(&ring->mqd_obj);
4451 			ring->mqd_obj = NULL;
4452 		}
4453 	}
4454 }
4455 
4456 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4457 {
4458 	int r, i, j;
4459 	u32 tmp;
4460 	bool use_doorbell = true;
4461 	u64 hqd_gpu_addr;
4462 	u64 mqd_gpu_addr;
4463 	u64 eop_gpu_addr;
4464 	u64 wb_gpu_addr;
4465 	u32 *buf;
4466 	struct vi_mqd *mqd;
4467 
4468 	/* init the pipes */
4469 	mutex_lock(&adev->srbm_mutex);
4470 	for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
4471 		int me = (i < 4) ? 1 : 2;
4472 		int pipe = (i < 4) ? i : (i - 4);
4473 
4474 		eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
4475 		eop_gpu_addr >>= 8;
4476 
4477 		vi_srbm_select(adev, me, pipe, 0, 0);
4478 
4479 		/* write the EOP addr */
4480 		WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
4481 		WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4482 
4483 		/* set the VMID assigned */
4484 		WREG32(mmCP_HQD_VMID, 0);
4485 
4486 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4487 		tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4488 		tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4489 				    (order_base_2(MEC_HPD_SIZE / 4) - 1));
4490 		WREG32(mmCP_HQD_EOP_CONTROL, tmp);
4491 	}
4492 	vi_srbm_select(adev, 0, 0, 0, 0);
4493 	mutex_unlock(&adev->srbm_mutex);
4494 
4495 	/* init the queues.  Just two for now. */
4496 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4497 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4498 
4499 		if (ring->mqd_obj == NULL) {
4500 			r = amdgpu_bo_create(adev,
4501 					     sizeof(struct vi_mqd),
4502 					     PAGE_SIZE, true,
4503 					     AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4504 					     NULL, &ring->mqd_obj);
4505 			if (r) {
4506 				dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4507 				return r;
4508 			}
4509 		}
4510 
4511 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
4512 		if (unlikely(r != 0)) {
4513 			gfx_v8_0_cp_compute_fini(adev);
4514 			return r;
4515 		}
4516 		r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4517 				  &mqd_gpu_addr);
4518 		if (r) {
4519 			dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4520 			gfx_v8_0_cp_compute_fini(adev);
4521 			return r;
4522 		}
4523 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4524 		if (r) {
4525 			dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4526 			gfx_v8_0_cp_compute_fini(adev);
4527 			return r;
4528 		}
4529 
4530 		/* init the mqd struct */
4531 		memset(buf, 0, sizeof(struct vi_mqd));
4532 
4533 		mqd = (struct vi_mqd *)buf;
4534 		mqd->header = 0xC0310800;
4535 		mqd->compute_pipelinestat_enable = 0x00000001;
4536 		mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4537 		mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4538 		mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4539 		mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4540 		mqd->compute_misc_reserved = 0x00000003;
4541 
4542 		mutex_lock(&adev->srbm_mutex);
4543 		vi_srbm_select(adev, ring->me,
4544 			       ring->pipe,
4545 			       ring->queue, 0);
4546 
4547 		/* disable wptr polling */
4548 		tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4549 		tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4550 		WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4551 
4552 		mqd->cp_hqd_eop_base_addr_lo =
4553 			RREG32(mmCP_HQD_EOP_BASE_ADDR);
4554 		mqd->cp_hqd_eop_base_addr_hi =
4555 			RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
4556 
4557 		/* enable doorbell? */
4558 		tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4559 		if (use_doorbell) {
4560 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4561 		} else {
4562 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4563 		}
4564 		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4565 		mqd->cp_hqd_pq_doorbell_control = tmp;
4566 
4567 		/* disable the queue if it's active */
4568 		mqd->cp_hqd_dequeue_request = 0;
4569 		mqd->cp_hqd_pq_rptr = 0;
4570 		mqd->cp_hqd_pq_wptr= 0;
4571 		if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4572 			WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4573 			for (j = 0; j < adev->usec_timeout; j++) {
4574 				if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4575 					break;
4576 				udelay(1);
4577 			}
4578 			WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4579 			WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4580 			WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4581 		}
4582 
4583 		/* set the pointer to the MQD */
4584 		mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4585 		mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4586 		WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4587 		WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4588 
4589 		/* set MQD vmid to 0 */
4590 		tmp = RREG32(mmCP_MQD_CONTROL);
4591 		tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4592 		WREG32(mmCP_MQD_CONTROL, tmp);
4593 		mqd->cp_mqd_control = tmp;
4594 
4595 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4596 		hqd_gpu_addr = ring->gpu_addr >> 8;
4597 		mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4598 		mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4599 		WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4600 		WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4601 
4602 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4603 		tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4604 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4605 				    (order_base_2(ring->ring_size / 4) - 1));
4606 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4607 			       ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4608 #ifdef __BIG_ENDIAN
4609 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4610 #endif
4611 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4612 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4613 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4614 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4615 		WREG32(mmCP_HQD_PQ_CONTROL, tmp);
4616 		mqd->cp_hqd_pq_control = tmp;
4617 
4618 		/* set the wb address wether it's enabled or not */
4619 		wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4620 		mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4621 		mqd->cp_hqd_pq_rptr_report_addr_hi =
4622 			upper_32_bits(wb_gpu_addr) & 0xffff;
4623 		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4624 		       mqd->cp_hqd_pq_rptr_report_addr_lo);
4625 		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4626 		       mqd->cp_hqd_pq_rptr_report_addr_hi);
4627 
4628 		/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4629 		wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4630 		mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4631 		mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4632 		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
4633 		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4634 		       mqd->cp_hqd_pq_wptr_poll_addr_hi);
4635 
4636 		/* enable the doorbell if requested */
4637 		if (use_doorbell) {
4638 			if ((adev->asic_type == CHIP_CARRIZO) ||
4639 			    (adev->asic_type == CHIP_FIJI) ||
4640 			    (adev->asic_type == CHIP_STONEY) ||
4641 			    (adev->asic_type == CHIP_POLARIS11) ||
4642 			    (adev->asic_type == CHIP_POLARIS10)) {
4643 				WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4644 				       AMDGPU_DOORBELL_KIQ << 2);
4645 				WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4646 				       AMDGPU_DOORBELL_MEC_RING7 << 2);
4647 			}
4648 			tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4649 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4650 					    DOORBELL_OFFSET, ring->doorbell_index);
4651 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4652 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4653 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4654 			mqd->cp_hqd_pq_doorbell_control = tmp;
4655 
4656 		} else {
4657 			mqd->cp_hqd_pq_doorbell_control = 0;
4658 		}
4659 		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
4660 		       mqd->cp_hqd_pq_doorbell_control);
4661 
4662 		/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4663 		ring->wptr = 0;
4664 		mqd->cp_hqd_pq_wptr = ring->wptr;
4665 		WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4666 		mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4667 
4668 		/* set the vmid for the queue */
4669 		mqd->cp_hqd_vmid = 0;
4670 		WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4671 
4672 		tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4673 		tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4674 		WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
4675 		mqd->cp_hqd_persistent_state = tmp;
4676 		if (adev->asic_type == CHIP_STONEY ||
4677 			adev->asic_type == CHIP_POLARIS11 ||
4678 			adev->asic_type == CHIP_POLARIS10) {
4679 			tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
4680 			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
4681 			WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
4682 		}
4683 
4684 		/* activate the queue */
4685 		mqd->cp_hqd_active = 1;
4686 		WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4687 
4688 		vi_srbm_select(adev, 0, 0, 0, 0);
4689 		mutex_unlock(&adev->srbm_mutex);
4690 
4691 		amdgpu_bo_kunmap(ring->mqd_obj);
4692 		amdgpu_bo_unreserve(ring->mqd_obj);
4693 	}
4694 
4695 	if (use_doorbell) {
4696 		tmp = RREG32(mmCP_PQ_STATUS);
4697 		tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4698 		WREG32(mmCP_PQ_STATUS, tmp);
4699 	}
4700 
4701 	gfx_v8_0_cp_compute_enable(adev, true);
4702 
4703 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4704 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4705 
4706 		ring->ready = true;
4707 		r = amdgpu_ring_test_ring(ring);
4708 		if (r)
4709 			ring->ready = false;
4710 	}
4711 
4712 	return 0;
4713 }
4714 
4715 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4716 {
4717 	int r;
4718 
4719 	if (!(adev->flags & AMD_IS_APU))
4720 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4721 
4722 	if (!adev->pp_enabled) {
4723 		if (!adev->firmware.smu_load) {
4724 			/* legacy firmware loading */
4725 			r = gfx_v8_0_cp_gfx_load_microcode(adev);
4726 			if (r)
4727 				return r;
4728 
4729 			r = gfx_v8_0_cp_compute_load_microcode(adev);
4730 			if (r)
4731 				return r;
4732 		} else {
4733 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4734 							AMDGPU_UCODE_ID_CP_CE);
4735 			if (r)
4736 				return -EINVAL;
4737 
4738 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4739 							AMDGPU_UCODE_ID_CP_PFP);
4740 			if (r)
4741 				return -EINVAL;
4742 
4743 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4744 							AMDGPU_UCODE_ID_CP_ME);
4745 			if (r)
4746 				return -EINVAL;
4747 
4748 			if (adev->asic_type == CHIP_TOPAZ) {
4749 				r = gfx_v8_0_cp_compute_load_microcode(adev);
4750 				if (r)
4751 					return r;
4752 			} else {
4753 				r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4754 										 AMDGPU_UCODE_ID_CP_MEC1);
4755 				if (r)
4756 					return -EINVAL;
4757 			}
4758 		}
4759 	}
4760 
4761 	r = gfx_v8_0_cp_gfx_resume(adev);
4762 	if (r)
4763 		return r;
4764 
4765 	r = gfx_v8_0_cp_compute_resume(adev);
4766 	if (r)
4767 		return r;
4768 
4769 	gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4770 
4771 	return 0;
4772 }
4773 
4774 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4775 {
4776 	gfx_v8_0_cp_gfx_enable(adev, enable);
4777 	gfx_v8_0_cp_compute_enable(adev, enable);
4778 }
4779 
4780 static int gfx_v8_0_hw_init(void *handle)
4781 {
4782 	int r;
4783 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4784 
4785 	gfx_v8_0_init_golden_registers(adev);
4786 
4787 	gfx_v8_0_gpu_init(adev);
4788 
4789 	r = gfx_v8_0_rlc_resume(adev);
4790 	if (r)
4791 		return r;
4792 
4793 	r = gfx_v8_0_cp_resume(adev);
4794 	if (r)
4795 		return r;
4796 
4797 	return r;
4798 }
4799 
4800 static int gfx_v8_0_hw_fini(void *handle)
4801 {
4802 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4803 
4804 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4805 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4806 	gfx_v8_0_cp_enable(adev, false);
4807 	gfx_v8_0_rlc_stop(adev);
4808 	gfx_v8_0_cp_compute_fini(adev);
4809 
4810 	amdgpu_set_powergating_state(adev,
4811 			AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
4812 
4813 	return 0;
4814 }
4815 
4816 static int gfx_v8_0_suspend(void *handle)
4817 {
4818 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4819 
4820 	return gfx_v8_0_hw_fini(adev);
4821 }
4822 
4823 static int gfx_v8_0_resume(void *handle)
4824 {
4825 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4826 
4827 	return gfx_v8_0_hw_init(adev);
4828 }
4829 
4830 static bool gfx_v8_0_is_idle(void *handle)
4831 {
4832 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4833 
4834 	if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
4835 		return false;
4836 	else
4837 		return true;
4838 }
4839 
4840 static int gfx_v8_0_wait_for_idle(void *handle)
4841 {
4842 	unsigned i;
4843 	u32 tmp;
4844 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4845 
4846 	for (i = 0; i < adev->usec_timeout; i++) {
4847 		/* read MC_STATUS */
4848 		tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
4849 
4850 		if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
4851 			return 0;
4852 		udelay(1);
4853 	}
4854 	return -ETIMEDOUT;
4855 }
4856 
4857 static int gfx_v8_0_soft_reset(void *handle)
4858 {
4859 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4860 	u32 tmp;
4861 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4862 
4863 	/* GRBM_STATUS */
4864 	tmp = RREG32(mmGRBM_STATUS);
4865 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4866 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4867 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4868 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4869 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4870 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4871 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4872 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4873 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4874 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4875 	}
4876 
4877 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4878 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4879 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4880 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4881 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4882 	}
4883 
4884 	/* GRBM_STATUS2 */
4885 	tmp = RREG32(mmGRBM_STATUS2);
4886 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4887 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4888 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4889 
4890 	/* SRBM_STATUS */
4891 	tmp = RREG32(mmSRBM_STATUS);
4892 	if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4893 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4894 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4895 
4896 	if (grbm_soft_reset || srbm_soft_reset) {
4897 		/* stop the rlc */
4898 		gfx_v8_0_rlc_stop(adev);
4899 
4900 		/* Disable GFX parsing/prefetching */
4901 		gfx_v8_0_cp_gfx_enable(adev, false);
4902 
4903 		/* Disable MEC parsing/prefetching */
4904 		gfx_v8_0_cp_compute_enable(adev, false);
4905 
4906 		if (grbm_soft_reset || srbm_soft_reset) {
4907 			tmp = RREG32(mmGMCON_DEBUG);
4908 			tmp = REG_SET_FIELD(tmp,
4909 					    GMCON_DEBUG, GFX_STALL, 1);
4910 			tmp = REG_SET_FIELD(tmp,
4911 					    GMCON_DEBUG, GFX_CLEAR, 1);
4912 			WREG32(mmGMCON_DEBUG, tmp);
4913 
4914 			udelay(50);
4915 		}
4916 
4917 		if (grbm_soft_reset) {
4918 			tmp = RREG32(mmGRBM_SOFT_RESET);
4919 			tmp |= grbm_soft_reset;
4920 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4921 			WREG32(mmGRBM_SOFT_RESET, tmp);
4922 			tmp = RREG32(mmGRBM_SOFT_RESET);
4923 
4924 			udelay(50);
4925 
4926 			tmp &= ~grbm_soft_reset;
4927 			WREG32(mmGRBM_SOFT_RESET, tmp);
4928 			tmp = RREG32(mmGRBM_SOFT_RESET);
4929 		}
4930 
4931 		if (srbm_soft_reset) {
4932 			tmp = RREG32(mmSRBM_SOFT_RESET);
4933 			tmp |= srbm_soft_reset;
4934 			dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4935 			WREG32(mmSRBM_SOFT_RESET, tmp);
4936 			tmp = RREG32(mmSRBM_SOFT_RESET);
4937 
4938 			udelay(50);
4939 
4940 			tmp &= ~srbm_soft_reset;
4941 			WREG32(mmSRBM_SOFT_RESET, tmp);
4942 			tmp = RREG32(mmSRBM_SOFT_RESET);
4943 		}
4944 
4945 		if (grbm_soft_reset || srbm_soft_reset) {
4946 			tmp = RREG32(mmGMCON_DEBUG);
4947 			tmp = REG_SET_FIELD(tmp,
4948 					    GMCON_DEBUG, GFX_STALL, 0);
4949 			tmp = REG_SET_FIELD(tmp,
4950 					    GMCON_DEBUG, GFX_CLEAR, 0);
4951 			WREG32(mmGMCON_DEBUG, tmp);
4952 		}
4953 
4954 		/* Wait a little for things to settle down */
4955 		udelay(50);
4956 	}
4957 	return 0;
4958 }
4959 
4960 /**
4961  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
4962  *
4963  * @adev: amdgpu_device pointer
4964  *
4965  * Fetches a GPU clock counter snapshot.
4966  * Returns the 64 bit clock counter snapshot.
4967  */
4968 uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4969 {
4970 	uint64_t clock;
4971 
4972 	mutex_lock(&adev->gfx.gpu_clock_mutex);
4973 	WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4974 	clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
4975 		((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4976 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
4977 	return clock;
4978 }
4979 
4980 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4981 					  uint32_t vmid,
4982 					  uint32_t gds_base, uint32_t gds_size,
4983 					  uint32_t gws_base, uint32_t gws_size,
4984 					  uint32_t oa_base, uint32_t oa_size)
4985 {
4986 	gds_base = gds_base >> AMDGPU_GDS_SHIFT;
4987 	gds_size = gds_size >> AMDGPU_GDS_SHIFT;
4988 
4989 	gws_base = gws_base >> AMDGPU_GWS_SHIFT;
4990 	gws_size = gws_size >> AMDGPU_GWS_SHIFT;
4991 
4992 	oa_base = oa_base >> AMDGPU_OA_SHIFT;
4993 	oa_size = oa_size >> AMDGPU_OA_SHIFT;
4994 
4995 	/* GDS Base */
4996 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4997 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4998 				WRITE_DATA_DST_SEL(0)));
4999 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5000 	amdgpu_ring_write(ring, 0);
5001 	amdgpu_ring_write(ring, gds_base);
5002 
5003 	/* GDS Size */
5004 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5005 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5006 				WRITE_DATA_DST_SEL(0)));
5007 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5008 	amdgpu_ring_write(ring, 0);
5009 	amdgpu_ring_write(ring, gds_size);
5010 
5011 	/* GWS */
5012 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5013 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5014 				WRITE_DATA_DST_SEL(0)));
5015 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5016 	amdgpu_ring_write(ring, 0);
5017 	amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5018 
5019 	/* OA */
5020 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5021 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5022 				WRITE_DATA_DST_SEL(0)));
5023 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5024 	amdgpu_ring_write(ring, 0);
5025 	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5026 }
5027 
5028 static int gfx_v8_0_early_init(void *handle)
5029 {
5030 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5031 
5032 	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5033 	adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5034 	gfx_v8_0_set_ring_funcs(adev);
5035 	gfx_v8_0_set_irq_funcs(adev);
5036 	gfx_v8_0_set_gds_init(adev);
5037 	gfx_v8_0_set_rlc_funcs(adev);
5038 
5039 	return 0;
5040 }
5041 
5042 static int gfx_v8_0_late_init(void *handle)
5043 {
5044 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5045 	int r;
5046 
5047 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5048 	if (r)
5049 		return r;
5050 
5051 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5052 	if (r)
5053 		return r;
5054 
5055 	/* requires IBs so do in late init after IB pool is initialized */
5056 	r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5057 	if (r)
5058 		return r;
5059 
5060 	amdgpu_set_powergating_state(adev,
5061 			AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5062 
5063 	return 0;
5064 }
5065 
5066 static void polaris11_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5067 		bool enable)
5068 {
5069 	uint32_t data, temp;
5070 
5071 	/* Send msg to SMU via Powerplay */
5072 	amdgpu_set_powergating_state(adev,
5073 			AMD_IP_BLOCK_TYPE_SMC,
5074 			enable ? AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5075 
5076 	if (enable) {
5077 		/* Enable static MGPG */
5078 		temp = data = RREG32(mmRLC_PG_CNTL);
5079 		data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
5080 
5081 		if (temp != data)
5082 			WREG32(mmRLC_PG_CNTL, data);
5083 	} else {
5084 		temp = data = RREG32(mmRLC_PG_CNTL);
5085 		data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
5086 
5087 		if (temp != data)
5088 			WREG32(mmRLC_PG_CNTL, data);
5089 	}
5090 }
5091 
5092 static void polaris11_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5093 		bool enable)
5094 {
5095 	uint32_t data, temp;
5096 
5097 	if (enable) {
5098 		/* Enable dynamic MGPG */
5099 		temp = data = RREG32(mmRLC_PG_CNTL);
5100 		data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
5101 
5102 		if (temp != data)
5103 			WREG32(mmRLC_PG_CNTL, data);
5104 	} else {
5105 		temp = data = RREG32(mmRLC_PG_CNTL);
5106 		data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
5107 
5108 		if (temp != data)
5109 			WREG32(mmRLC_PG_CNTL, data);
5110 	}
5111 }
5112 
5113 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5114 		bool enable)
5115 {
5116 	uint32_t data, temp;
5117 
5118 	if (enable) {
5119 		/* Enable quick PG */
5120 		temp = data = RREG32(mmRLC_PG_CNTL);
5121 		data |= 0x100000;
5122 
5123 		if (temp != data)
5124 			WREG32(mmRLC_PG_CNTL, data);
5125 	} else {
5126 		temp = data = RREG32(mmRLC_PG_CNTL);
5127 		data &= ~0x100000;
5128 
5129 		if (temp != data)
5130 			WREG32(mmRLC_PG_CNTL, data);
5131 	}
5132 }
5133 
5134 static int gfx_v8_0_set_powergating_state(void *handle,
5135 					  enum amd_powergating_state state)
5136 {
5137 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5138 
5139 	if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5140 		return 0;
5141 
5142 	switch (adev->asic_type) {
5143 	case CHIP_POLARIS11:
5144 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG)
5145 			polaris11_enable_gfx_static_mg_power_gating(adev,
5146 					state == AMD_PG_STATE_GATE ? true : false);
5147 		else if (adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG)
5148 			polaris11_enable_gfx_dynamic_mg_power_gating(adev,
5149 					state == AMD_PG_STATE_GATE ? true : false);
5150 		else
5151 			polaris11_enable_gfx_quick_mg_power_gating(adev,
5152 					state == AMD_PG_STATE_GATE ? true : false);
5153 		break;
5154 	default:
5155 		break;
5156 	}
5157 
5158 	return 0;
5159 }
5160 
5161 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5162 				     uint32_t reg_addr, uint32_t cmd)
5163 {
5164 	uint32_t data;
5165 
5166 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
5167 
5168 	WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5169 	WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5170 
5171 	data = RREG32(mmRLC_SERDES_WR_CTRL);
5172 	if (adev->asic_type == CHIP_STONEY)
5173 			data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5174 			RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5175 			RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5176 			RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5177 			RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5178 			RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5179 			RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5180 			RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5181 			RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5182 	else
5183 		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5184 			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5185 			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5186 			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5187 			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5188 			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5189 			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5190 			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5191 			  RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5192 			  RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5193 			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5194 	data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5195 		 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5196 		 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5197 		 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5198 
5199 	WREG32(mmRLC_SERDES_WR_CTRL, data);
5200 }
5201 
5202 #define MSG_ENTER_RLC_SAFE_MODE     1
5203 #define MSG_EXIT_RLC_SAFE_MODE      0
5204 
5205 #define RLC_GPR_REG2__REQ_MASK           0x00000001
5206 #define RLC_GPR_REG2__MESSAGE__SHIFT     0x00000001
5207 #define RLC_GPR_REG2__MESSAGE_MASK       0x0000001e
5208 
5209 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
5210 {
5211 	u32 data = 0;
5212 	unsigned i;
5213 
5214 	data = RREG32(mmRLC_CNTL);
5215 	if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5216 		return;
5217 
5218 	if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5219 	    (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5220 			       AMD_PG_SUPPORT_GFX_DMG))) {
5221 		data |= RLC_GPR_REG2__REQ_MASK;
5222 		data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5223 		data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5224 		WREG32(mmRLC_GPR_REG2, data);
5225 
5226 		for (i = 0; i < adev->usec_timeout; i++) {
5227 			if ((RREG32(mmRLC_GPM_STAT) &
5228 			     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5229 			      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5230 			    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5231 			     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5232 				break;
5233 			udelay(1);
5234 		}
5235 
5236 		for (i = 0; i < adev->usec_timeout; i++) {
5237 			if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
5238 				break;
5239 			udelay(1);
5240 		}
5241 		adev->gfx.rlc.in_safe_mode = true;
5242 	}
5243 }
5244 
5245 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
5246 {
5247 	u32 data;
5248 	unsigned i;
5249 
5250 	data = RREG32(mmRLC_CNTL);
5251 	if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5252 		return;
5253 
5254 	if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5255 	    (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5256 			       AMD_PG_SUPPORT_GFX_DMG))) {
5257 		data |= RLC_GPR_REG2__REQ_MASK;
5258 		data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5259 		data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5260 		WREG32(mmRLC_GPR_REG2, data);
5261 		adev->gfx.rlc.in_safe_mode = false;
5262 	}
5263 
5264 	for (i = 0; i < adev->usec_timeout; i++) {
5265 		if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
5266 			break;
5267 		udelay(1);
5268 	}
5269 }
5270 
5271 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5272 {
5273 	u32 data;
5274 	unsigned i;
5275 
5276 	data = RREG32(mmRLC_CNTL);
5277 	if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5278 		return;
5279 
5280 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5281 		data |= RLC_SAFE_MODE__CMD_MASK;
5282 		data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5283 		data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5284 		WREG32(mmRLC_SAFE_MODE, data);
5285 
5286 		for (i = 0; i < adev->usec_timeout; i++) {
5287 			if ((RREG32(mmRLC_GPM_STAT) &
5288 			     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5289 			      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5290 			    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5291 			     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5292 				break;
5293 			udelay(1);
5294 		}
5295 
5296 		for (i = 0; i < adev->usec_timeout; i++) {
5297 			if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
5298 				break;
5299 			udelay(1);
5300 		}
5301 		adev->gfx.rlc.in_safe_mode = true;
5302 	}
5303 }
5304 
5305 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5306 {
5307 	u32 data = 0;
5308 	unsigned i;
5309 
5310 	data = RREG32(mmRLC_CNTL);
5311 	if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5312 		return;
5313 
5314 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5315 		if (adev->gfx.rlc.in_safe_mode) {
5316 			data |= RLC_SAFE_MODE__CMD_MASK;
5317 			data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5318 			WREG32(mmRLC_SAFE_MODE, data);
5319 			adev->gfx.rlc.in_safe_mode = false;
5320 		}
5321 	}
5322 
5323 	for (i = 0; i < adev->usec_timeout; i++) {
5324 		if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
5325 			break;
5326 		udelay(1);
5327 	}
5328 }
5329 
5330 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
5331 {
5332 	adev->gfx.rlc.in_safe_mode = true;
5333 }
5334 
5335 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
5336 {
5337 	adev->gfx.rlc.in_safe_mode = false;
5338 }
5339 
5340 static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
5341 	.enter_safe_mode = cz_enter_rlc_safe_mode,
5342 	.exit_safe_mode = cz_exit_rlc_safe_mode
5343 };
5344 
5345 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5346 	.enter_safe_mode = iceland_enter_rlc_safe_mode,
5347 	.exit_safe_mode = iceland_exit_rlc_safe_mode
5348 };
5349 
5350 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
5351 	.enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
5352 	.exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
5353 };
5354 
5355 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5356 						      bool enable)
5357 {
5358 	uint32_t temp, data;
5359 
5360 	adev->gfx.rlc.funcs->enter_safe_mode(adev);
5361 
5362 	/* It is disabled by HW by default */
5363 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5364 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5365 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5366 				/* 1 - RLC memory Light sleep */
5367 				temp = data = RREG32(mmRLC_MEM_SLP_CNTL);
5368 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5369 				if (temp != data)
5370 					WREG32(mmRLC_MEM_SLP_CNTL, data);
5371 			}
5372 
5373 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5374 				/* 2 - CP memory Light sleep */
5375 				temp = data = RREG32(mmCP_MEM_SLP_CNTL);
5376 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5377 				if (temp != data)
5378 					WREG32(mmCP_MEM_SLP_CNTL, data);
5379 			}
5380 		}
5381 
5382 		/* 3 - RLC_CGTT_MGCG_OVERRIDE */
5383 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5384 		if (adev->flags & AMD_IS_APU)
5385 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5386 				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5387 				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5388 		else
5389 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5390 				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5391 				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5392 				  RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5393 
5394 		if (temp != data)
5395 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5396 
5397 		/* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5398 		gfx_v8_0_wait_for_rlc_serdes(adev);
5399 
5400 		/* 5 - clear mgcg override */
5401 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5402 
5403 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5404 			/* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5405 			temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5406 			data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5407 			data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5408 			data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5409 			data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5410 			if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5411 			    (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5412 				data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5413 			data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5414 			data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5415 			if (temp != data)
5416 				WREG32(mmCGTS_SM_CTRL_REG, data);
5417 		}
5418 		udelay(50);
5419 
5420 		/* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5421 		gfx_v8_0_wait_for_rlc_serdes(adev);
5422 	} else {
5423 		/* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5424 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5425 		data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5426 				RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5427 				RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5428 				RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5429 		if (temp != data)
5430 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5431 
5432 		/* 2 - disable MGLS in RLC */
5433 		data = RREG32(mmRLC_MEM_SLP_CNTL);
5434 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5435 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5436 			WREG32(mmRLC_MEM_SLP_CNTL, data);
5437 		}
5438 
5439 		/* 3 - disable MGLS in CP */
5440 		data = RREG32(mmCP_MEM_SLP_CNTL);
5441 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5442 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5443 			WREG32(mmCP_MEM_SLP_CNTL, data);
5444 		}
5445 
5446 		/* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5447 		temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5448 		data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5449 				CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5450 		if (temp != data)
5451 			WREG32(mmCGTS_SM_CTRL_REG, data);
5452 
5453 		/* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5454 		gfx_v8_0_wait_for_rlc_serdes(adev);
5455 
5456 		/* 6 - set mgcg override */
5457 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5458 
5459 		udelay(50);
5460 
5461 		/* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5462 		gfx_v8_0_wait_for_rlc_serdes(adev);
5463 	}
5464 
5465 	adev->gfx.rlc.funcs->exit_safe_mode(adev);
5466 }
5467 
5468 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5469 						      bool enable)
5470 {
5471 	uint32_t temp, temp1, data, data1;
5472 
5473 	temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5474 
5475 	adev->gfx.rlc.funcs->enter_safe_mode(adev);
5476 
5477 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5478 		/* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
5479 		 * Cmp_busy/GFX_Idle interrupts
5480 		 */
5481 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5482 
5483 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5484 		data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5485 		if (temp1 != data1)
5486 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5487 
5488 		/* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5489 		gfx_v8_0_wait_for_rlc_serdes(adev);
5490 
5491 		/* 3 - clear cgcg override */
5492 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5493 
5494 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5495 		gfx_v8_0_wait_for_rlc_serdes(adev);
5496 
5497 		/* 4 - write cmd to set CGLS */
5498 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5499 
5500 		/* 5 - enable cgcg */
5501 		data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5502 
5503 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5504 			/* enable cgls*/
5505 			data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5506 
5507 			temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5508 			data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5509 
5510 			if (temp1 != data1)
5511 				WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5512 		} else {
5513 			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5514 		}
5515 
5516 		if (temp != data)
5517 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5518 	} else {
5519 		/* disable cntx_empty_int_enable & GFX Idle interrupt */
5520 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5521 
5522 		/* TEST CGCG */
5523 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5524 		data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5525 				RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5526 		if (temp1 != data1)
5527 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5528 
5529 		/* read gfx register to wake up cgcg */
5530 		RREG32(mmCB_CGTT_SCLK_CTRL);
5531 		RREG32(mmCB_CGTT_SCLK_CTRL);
5532 		RREG32(mmCB_CGTT_SCLK_CTRL);
5533 		RREG32(mmCB_CGTT_SCLK_CTRL);
5534 
5535 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5536 		gfx_v8_0_wait_for_rlc_serdes(adev);
5537 
5538 		/* write cmd to Set CGCG Overrride */
5539 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5540 
5541 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5542 		gfx_v8_0_wait_for_rlc_serdes(adev);
5543 
5544 		/* write cmd to Clear CGLS */
5545 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5546 
5547 		/* disable cgcg, cgls should be disabled too. */
5548 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5549 			  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5550 		if (temp != data)
5551 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5552 	}
5553 
5554 	adev->gfx.rlc.funcs->exit_safe_mode(adev);
5555 }
5556 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5557 					    bool enable)
5558 {
5559 	if (enable) {
5560 		/* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5561 		 * ===  MGCG + MGLS + TS(CG/LS) ===
5562 		 */
5563 		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5564 		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5565 	} else {
5566 		/* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5567 		 * ===  CGCG + CGLS ===
5568 		 */
5569 		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5570 		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5571 	}
5572 	return 0;
5573 }
5574 
5575 static int gfx_v8_0_set_clockgating_state(void *handle,
5576 					  enum amd_clockgating_state state)
5577 {
5578 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5579 
5580 	switch (adev->asic_type) {
5581 	case CHIP_FIJI:
5582 	case CHIP_CARRIZO:
5583 	case CHIP_STONEY:
5584 		gfx_v8_0_update_gfx_clock_gating(adev,
5585 						 state == AMD_CG_STATE_GATE ? true : false);
5586 		break;
5587 	default:
5588 		break;
5589 	}
5590 	return 0;
5591 }
5592 
5593 static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5594 {
5595 	u32 rptr;
5596 
5597 	rptr = ring->adev->wb.wb[ring->rptr_offs];
5598 
5599 	return rptr;
5600 }
5601 
5602 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5603 {
5604 	struct amdgpu_device *adev = ring->adev;
5605 	u32 wptr;
5606 
5607 	if (ring->use_doorbell)
5608 		/* XXX check if swapping is necessary on BE */
5609 		wptr = ring->adev->wb.wb[ring->wptr_offs];
5610 	else
5611 		wptr = RREG32(mmCP_RB0_WPTR);
5612 
5613 	return wptr;
5614 }
5615 
5616 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5617 {
5618 	struct amdgpu_device *adev = ring->adev;
5619 
5620 	if (ring->use_doorbell) {
5621 		/* XXX check if swapping is necessary on BE */
5622 		adev->wb.wb[ring->wptr_offs] = ring->wptr;
5623 		WDOORBELL32(ring->doorbell_index, ring->wptr);
5624 	} else {
5625 		WREG32(mmCP_RB0_WPTR, ring->wptr);
5626 		(void)RREG32(mmCP_RB0_WPTR);
5627 	}
5628 }
5629 
5630 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5631 {
5632 	u32 ref_and_mask, reg_mem_engine;
5633 
5634 	if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
5635 		switch (ring->me) {
5636 		case 1:
5637 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
5638 			break;
5639 		case 2:
5640 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
5641 			break;
5642 		default:
5643 			return;
5644 		}
5645 		reg_mem_engine = 0;
5646 	} else {
5647 		ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
5648 		reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
5649 	}
5650 
5651 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5652 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
5653 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
5654 				 reg_mem_engine));
5655 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
5656 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
5657 	amdgpu_ring_write(ring, ref_and_mask);
5658 	amdgpu_ring_write(ring, ref_and_mask);
5659 	amdgpu_ring_write(ring, 0x20); /* poll interval */
5660 }
5661 
5662 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
5663 {
5664 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5665 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5666 				 WRITE_DATA_DST_SEL(0) |
5667 				 WR_CONFIRM));
5668 	amdgpu_ring_write(ring, mmHDP_DEBUG0);
5669 	amdgpu_ring_write(ring, 0);
5670 	amdgpu_ring_write(ring, 1);
5671 
5672 }
5673 
5674 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5675 				      struct amdgpu_ib *ib,
5676 				      unsigned vm_id, bool ctx_switch)
5677 {
5678 	u32 header, control = 0;
5679 	u32 next_rptr = ring->wptr + 5;
5680 
5681 	if (ctx_switch)
5682 		next_rptr += 2;
5683 
5684 	next_rptr += 4;
5685 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5686 	amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
5687 	amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
5688 	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
5689 	amdgpu_ring_write(ring, next_rptr);
5690 
5691 	/* insert SWITCH_BUFFER packet before first IB in the ring frame */
5692 	if (ctx_switch) {
5693 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5694 		amdgpu_ring_write(ring, 0);
5695 	}
5696 
5697 	if (ib->flags & AMDGPU_IB_FLAG_CE)
5698 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5699 	else
5700 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5701 
5702 	control |= ib->length_dw | (vm_id << 24);
5703 
5704 	amdgpu_ring_write(ring, header);
5705 	amdgpu_ring_write(ring,
5706 #ifdef __BIG_ENDIAN
5707 			  (2 << 0) |
5708 #endif
5709 			  (ib->gpu_addr & 0xFFFFFFFC));
5710 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5711 	amdgpu_ring_write(ring, control);
5712 }
5713 
5714 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5715 					  struct amdgpu_ib *ib,
5716 					  unsigned vm_id, bool ctx_switch)
5717 {
5718 	u32 header, control = 0;
5719 	u32 next_rptr = ring->wptr + 5;
5720 
5721 	control |= INDIRECT_BUFFER_VALID;
5722 
5723 	next_rptr += 4;
5724 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5725 	amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
5726 	amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
5727 	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
5728 	amdgpu_ring_write(ring, next_rptr);
5729 
5730 	header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5731 
5732 	control |= ib->length_dw | (vm_id << 24);
5733 
5734 	amdgpu_ring_write(ring, header);
5735 	amdgpu_ring_write(ring,
5736 #ifdef __BIG_ENDIAN
5737 					  (2 << 0) |
5738 #endif
5739 					  (ib->gpu_addr & 0xFFFFFFFC));
5740 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5741 	amdgpu_ring_write(ring, control);
5742 }
5743 
5744 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
5745 					 u64 seq, unsigned flags)
5746 {
5747 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5748 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5749 
5750 	/* EVENT_WRITE_EOP - flush caches, send int */
5751 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
5752 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
5753 				 EOP_TC_ACTION_EN |
5754 				 EOP_TC_WB_ACTION_EN |
5755 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5756 				 EVENT_INDEX(5)));
5757 	amdgpu_ring_write(ring, addr & 0xfffffffc);
5758 	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
5759 			  DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5760 	amdgpu_ring_write(ring, lower_32_bits(seq));
5761 	amdgpu_ring_write(ring, upper_32_bits(seq));
5762 
5763 }
5764 
5765 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5766 {
5767 	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
5768 	uint32_t seq = ring->fence_drv.sync_seq;
5769 	uint64_t addr = ring->fence_drv.gpu_addr;
5770 
5771 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5772 	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
5773 				 WAIT_REG_MEM_FUNCTION(3) | /* equal */
5774 				 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
5775 	amdgpu_ring_write(ring, addr & 0xfffffffc);
5776 	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
5777 	amdgpu_ring_write(ring, seq);
5778 	amdgpu_ring_write(ring, 0xffffffff);
5779 	amdgpu_ring_write(ring, 4); /* poll interval */
5780 
5781 	if (usepfp) {
5782 		/* synce CE with ME to prevent CE fetch CEIB before context switch done */
5783 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5784 		amdgpu_ring_write(ring, 0);
5785 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5786 		amdgpu_ring_write(ring, 0);
5787 	}
5788 }
5789 
5790 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5791 					unsigned vm_id, uint64_t pd_addr)
5792 {
5793 	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
5794 
5795 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5796 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5797 				 WRITE_DATA_DST_SEL(0)) |
5798 				 WR_CONFIRM);
5799 	if (vm_id < 8) {
5800 		amdgpu_ring_write(ring,
5801 				  (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
5802 	} else {
5803 		amdgpu_ring_write(ring,
5804 				  (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
5805 	}
5806 	amdgpu_ring_write(ring, 0);
5807 	amdgpu_ring_write(ring, pd_addr >> 12);
5808 
5809 	/* bits 0-15 are the VM contexts0-15 */
5810 	/* invalidate the cache */
5811 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5812 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5813 				 WRITE_DATA_DST_SEL(0)));
5814 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
5815 	amdgpu_ring_write(ring, 0);
5816 	amdgpu_ring_write(ring, 1 << vm_id);
5817 
5818 	/* wait for the invalidate to complete */
5819 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5820 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5821 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
5822 				 WAIT_REG_MEM_ENGINE(0))); /* me */
5823 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
5824 	amdgpu_ring_write(ring, 0);
5825 	amdgpu_ring_write(ring, 0); /* ref */
5826 	amdgpu_ring_write(ring, 0); /* mask */
5827 	amdgpu_ring_write(ring, 0x20); /* poll interval */
5828 
5829 	/* compute doesn't have PFP */
5830 	if (usepfp) {
5831 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5832 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5833 		amdgpu_ring_write(ring, 0x0);
5834 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5835 		amdgpu_ring_write(ring, 0);
5836 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5837 		amdgpu_ring_write(ring, 0);
5838 	}
5839 }
5840 
5841 static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5842 {
5843 	return ring->adev->wb.wb[ring->rptr_offs];
5844 }
5845 
5846 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5847 {
5848 	return ring->adev->wb.wb[ring->wptr_offs];
5849 }
5850 
5851 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5852 {
5853 	struct amdgpu_device *adev = ring->adev;
5854 
5855 	/* XXX check if swapping is necessary on BE */
5856 	adev->wb.wb[ring->wptr_offs] = ring->wptr;
5857 	WDOORBELL32(ring->doorbell_index, ring->wptr);
5858 }
5859 
5860 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
5861 					     u64 addr, u64 seq,
5862 					     unsigned flags)
5863 {
5864 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5865 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5866 
5867 	/* RELEASE_MEM - flush caches, send int */
5868 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
5869 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
5870 				 EOP_TC_ACTION_EN |
5871 				 EOP_TC_WB_ACTION_EN |
5872 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5873 				 EVENT_INDEX(5)));
5874 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5875 	amdgpu_ring_write(ring, addr & 0xfffffffc);
5876 	amdgpu_ring_write(ring, upper_32_bits(addr));
5877 	amdgpu_ring_write(ring, lower_32_bits(seq));
5878 	amdgpu_ring_write(ring, upper_32_bits(seq));
5879 }
5880 
5881 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5882 						 enum amdgpu_interrupt_state state)
5883 {
5884 	u32 cp_int_cntl;
5885 
5886 	switch (state) {
5887 	case AMDGPU_IRQ_STATE_DISABLE:
5888 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5889 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5890 					    TIME_STAMP_INT_ENABLE, 0);
5891 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5892 		break;
5893 	case AMDGPU_IRQ_STATE_ENABLE:
5894 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5895 		cp_int_cntl =
5896 			REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5897 				      TIME_STAMP_INT_ENABLE, 1);
5898 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5899 		break;
5900 	default:
5901 		break;
5902 	}
5903 }
5904 
5905 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5906 						     int me, int pipe,
5907 						     enum amdgpu_interrupt_state state)
5908 {
5909 	u32 mec_int_cntl, mec_int_cntl_reg;
5910 
5911 	/*
5912 	 * amdgpu controls only pipe 0 of MEC1. That's why this function only
5913 	 * handles the setting of interrupts for this specific pipe. All other
5914 	 * pipes' interrupts are set by amdkfd.
5915 	 */
5916 
5917 	if (me == 1) {
5918 		switch (pipe) {
5919 		case 0:
5920 			mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
5921 			break;
5922 		default:
5923 			DRM_DEBUG("invalid pipe %d\n", pipe);
5924 			return;
5925 		}
5926 	} else {
5927 		DRM_DEBUG("invalid me %d\n", me);
5928 		return;
5929 	}
5930 
5931 	switch (state) {
5932 	case AMDGPU_IRQ_STATE_DISABLE:
5933 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5934 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5935 					     TIME_STAMP_INT_ENABLE, 0);
5936 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5937 		break;
5938 	case AMDGPU_IRQ_STATE_ENABLE:
5939 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5940 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5941 					     TIME_STAMP_INT_ENABLE, 1);
5942 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5943 		break;
5944 	default:
5945 		break;
5946 	}
5947 }
5948 
5949 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5950 					     struct amdgpu_irq_src *source,
5951 					     unsigned type,
5952 					     enum amdgpu_interrupt_state state)
5953 {
5954 	u32 cp_int_cntl;
5955 
5956 	switch (state) {
5957 	case AMDGPU_IRQ_STATE_DISABLE:
5958 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5959 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5960 					    PRIV_REG_INT_ENABLE, 0);
5961 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5962 		break;
5963 	case AMDGPU_IRQ_STATE_ENABLE:
5964 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5965 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5966 					    PRIV_REG_INT_ENABLE, 1);
5967 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5968 		break;
5969 	default:
5970 		break;
5971 	}
5972 
5973 	return 0;
5974 }
5975 
5976 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5977 					      struct amdgpu_irq_src *source,
5978 					      unsigned type,
5979 					      enum amdgpu_interrupt_state state)
5980 {
5981 	u32 cp_int_cntl;
5982 
5983 	switch (state) {
5984 	case AMDGPU_IRQ_STATE_DISABLE:
5985 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5986 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5987 					    PRIV_INSTR_INT_ENABLE, 0);
5988 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5989 		break;
5990 	case AMDGPU_IRQ_STATE_ENABLE:
5991 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5992 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5993 					    PRIV_INSTR_INT_ENABLE, 1);
5994 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5995 		break;
5996 	default:
5997 		break;
5998 	}
5999 
6000 	return 0;
6001 }
6002 
6003 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6004 					    struct amdgpu_irq_src *src,
6005 					    unsigned type,
6006 					    enum amdgpu_interrupt_state state)
6007 {
6008 	switch (type) {
6009 	case AMDGPU_CP_IRQ_GFX_EOP:
6010 		gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6011 		break;
6012 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6013 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6014 		break;
6015 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6016 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6017 		break;
6018 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6019 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6020 		break;
6021 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6022 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6023 		break;
6024 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6025 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6026 		break;
6027 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6028 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6029 		break;
6030 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6031 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6032 		break;
6033 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6034 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6035 		break;
6036 	default:
6037 		break;
6038 	}
6039 	return 0;
6040 }
6041 
6042 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6043 			    struct amdgpu_irq_src *source,
6044 			    struct amdgpu_iv_entry *entry)
6045 {
6046 	int i;
6047 	u8 me_id, pipe_id, queue_id;
6048 	struct amdgpu_ring *ring;
6049 
6050 	DRM_DEBUG("IH: CP EOP\n");
6051 	me_id = (entry->ring_id & 0x0c) >> 2;
6052 	pipe_id = (entry->ring_id & 0x03) >> 0;
6053 	queue_id = (entry->ring_id & 0x70) >> 4;
6054 
6055 	switch (me_id) {
6056 	case 0:
6057 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6058 		break;
6059 	case 1:
6060 	case 2:
6061 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6062 			ring = &adev->gfx.compute_ring[i];
6063 			/* Per-queue interrupt is supported for MEC starting from VI.
6064 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
6065 			  */
6066 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6067 				amdgpu_fence_process(ring);
6068 		}
6069 		break;
6070 	}
6071 	return 0;
6072 }
6073 
6074 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6075 				 struct amdgpu_irq_src *source,
6076 				 struct amdgpu_iv_entry *entry)
6077 {
6078 	DRM_ERROR("Illegal register access in command stream\n");
6079 	schedule_work(&adev->reset_work);
6080 	return 0;
6081 }
6082 
6083 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6084 				  struct amdgpu_irq_src *source,
6085 				  struct amdgpu_iv_entry *entry)
6086 {
6087 	DRM_ERROR("Illegal instruction in command stream\n");
6088 	schedule_work(&adev->reset_work);
6089 	return 0;
6090 }
6091 
6092 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6093 	.name = "gfx_v8_0",
6094 	.early_init = gfx_v8_0_early_init,
6095 	.late_init = gfx_v8_0_late_init,
6096 	.sw_init = gfx_v8_0_sw_init,
6097 	.sw_fini = gfx_v8_0_sw_fini,
6098 	.hw_init = gfx_v8_0_hw_init,
6099 	.hw_fini = gfx_v8_0_hw_fini,
6100 	.suspend = gfx_v8_0_suspend,
6101 	.resume = gfx_v8_0_resume,
6102 	.is_idle = gfx_v8_0_is_idle,
6103 	.wait_for_idle = gfx_v8_0_wait_for_idle,
6104 	.soft_reset = gfx_v8_0_soft_reset,
6105 	.set_clockgating_state = gfx_v8_0_set_clockgating_state,
6106 	.set_powergating_state = gfx_v8_0_set_powergating_state,
6107 };
6108 
6109 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6110 	.get_rptr = gfx_v8_0_ring_get_rptr_gfx,
6111 	.get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6112 	.set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6113 	.parse_cs = NULL,
6114 	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6115 	.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6116 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6117 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6118 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6119 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6120 	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6121 	.test_ring = gfx_v8_0_ring_test_ring,
6122 	.test_ib = gfx_v8_0_ring_test_ib,
6123 	.insert_nop = amdgpu_ring_insert_nop,
6124 	.pad_ib = amdgpu_ring_generic_pad_ib,
6125 };
6126 
6127 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6128 	.get_rptr = gfx_v8_0_ring_get_rptr_compute,
6129 	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
6130 	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
6131 	.parse_cs = NULL,
6132 	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
6133 	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
6134 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6135 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6136 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6137 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6138 	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6139 	.test_ring = gfx_v8_0_ring_test_ring,
6140 	.test_ib = gfx_v8_0_ring_test_ib,
6141 	.insert_nop = amdgpu_ring_insert_nop,
6142 	.pad_ib = amdgpu_ring_generic_pad_ib,
6143 };
6144 
6145 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6146 {
6147 	int i;
6148 
6149 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6150 		adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6151 
6152 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6153 		adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6154 }
6155 
6156 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6157 	.set = gfx_v8_0_set_eop_interrupt_state,
6158 	.process = gfx_v8_0_eop_irq,
6159 };
6160 
6161 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6162 	.set = gfx_v8_0_set_priv_reg_fault_state,
6163 	.process = gfx_v8_0_priv_reg_irq,
6164 };
6165 
6166 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6167 	.set = gfx_v8_0_set_priv_inst_fault_state,
6168 	.process = gfx_v8_0_priv_inst_irq,
6169 };
6170 
6171 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6172 {
6173 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6174 	adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6175 
6176 	adev->gfx.priv_reg_irq.num_types = 1;
6177 	adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6178 
6179 	adev->gfx.priv_inst_irq.num_types = 1;
6180 	adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6181 }
6182 
6183 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6184 {
6185 	switch (adev->asic_type) {
6186 	case CHIP_TOPAZ:
6187 	case CHIP_STONEY:
6188 		adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6189 		break;
6190 	case CHIP_CARRIZO:
6191 		adev->gfx.rlc.funcs = &cz_rlc_funcs;
6192 		break;
6193 	default:
6194 		adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
6195 		break;
6196 	}
6197 }
6198 
6199 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6200 {
6201 	/* init asci gds info */
6202 	adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6203 	adev->gds.gws.total_size = 64;
6204 	adev->gds.oa.total_size = 16;
6205 
6206 	if (adev->gds.mem.total_size == 64 * 1024) {
6207 		adev->gds.mem.gfx_partition_size = 4096;
6208 		adev->gds.mem.cs_partition_size = 4096;
6209 
6210 		adev->gds.gws.gfx_partition_size = 4;
6211 		adev->gds.gws.cs_partition_size = 4;
6212 
6213 		adev->gds.oa.gfx_partition_size = 4;
6214 		adev->gds.oa.cs_partition_size = 1;
6215 	} else {
6216 		adev->gds.mem.gfx_partition_size = 1024;
6217 		adev->gds.mem.cs_partition_size = 1024;
6218 
6219 		adev->gds.gws.gfx_partition_size = 16;
6220 		adev->gds.gws.cs_partition_size = 16;
6221 
6222 		adev->gds.oa.gfx_partition_size = 4;
6223 		adev->gds.oa.cs_partition_size = 4;
6224 	}
6225 }
6226 
6227 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6228 {
6229 	u32 data, mask;
6230 
6231 	data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
6232 	data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6233 
6234 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6235 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6236 
6237 	mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
6238 
6239 	return (~data) & mask;
6240 }
6241 
6242 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6243 {
6244 	int i, j, k, counter, active_cu_number = 0;
6245 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6246 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6247 
6248 	memset(cu_info, 0, sizeof(*cu_info));
6249 
6250 	mutex_lock(&adev->grbm_idx_mutex);
6251 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6252 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6253 			mask = 1;
6254 			ao_bitmap = 0;
6255 			counter = 0;
6256 			gfx_v8_0_select_se_sh(adev, i, j);
6257 			bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6258 			cu_info->bitmap[i][j] = bitmap;
6259 
6260 			for (k = 0; k < 16; k ++) {
6261 				if (bitmap & mask) {
6262 					if (counter < 2)
6263 						ao_bitmap |= mask;
6264 					counter ++;
6265 				}
6266 				mask <<= 1;
6267 			}
6268 			active_cu_number += counter;
6269 			ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6270 		}
6271 	}
6272 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
6273 	mutex_unlock(&adev->grbm_idx_mutex);
6274 
6275 	cu_info->number = active_cu_number;
6276 	cu_info->ao_cu_mask = ao_cu_mask;
6277 }
6278