xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c (revision 0edbfea5)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vid.h"
29 #include "amdgpu_ucode.h"
30 #include "amdgpu_atombios.h"
31 #include "clearstate_vi.h"
32 
33 #include "gmc/gmc_8_2_d.h"
34 #include "gmc/gmc_8_2_sh_mask.h"
35 
36 #include "oss/oss_3_0_d.h"
37 #include "oss/oss_3_0_sh_mask.h"
38 
39 #include "bif/bif_5_0_d.h"
40 #include "bif/bif_5_0_sh_mask.h"
41 
42 #include "gca/gfx_8_0_d.h"
43 #include "gca/gfx_8_0_enum.h"
44 #include "gca/gfx_8_0_sh_mask.h"
45 #include "gca/gfx_8_0_enum.h"
46 
47 #include "dce/dce_10_0_d.h"
48 #include "dce/dce_10_0_sh_mask.h"
49 
50 #define GFX8_NUM_GFX_RINGS     1
51 #define GFX8_NUM_COMPUTE_RINGS 8
52 
53 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
54 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
55 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
56 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
57 
58 #define ARRAY_MODE(x)					((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
59 #define PIPE_CONFIG(x)					((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
60 #define TILE_SPLIT(x)					((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
61 #define MICRO_TILE_MODE_NEW(x)				((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
62 #define SAMPLE_SPLIT(x)					((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
63 #define BANK_WIDTH(x)					((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
64 #define BANK_HEIGHT(x)					((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
65 #define MACRO_TILE_ASPECT(x)				((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
66 #define NUM_BANKS(x)					((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
67 
68 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
69 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
70 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
71 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
72 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
73 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
74 
75 /* BPM SERDES CMD */
76 #define SET_BPM_SERDES_CMD    1
77 #define CLE_BPM_SERDES_CMD    0
78 
79 /* BPM Register Address*/
80 enum {
81 	BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
82 	BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
83 	BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
84 	BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
85 	BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
86 	BPM_REG_FGCG_MAX
87 };
88 
89 #define RLC_FormatDirectRegListLength        14
90 
91 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
92 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
93 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
94 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
95 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
97 
98 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
99 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
100 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
101 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
103 
104 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
105 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
106 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
107 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
110 
111 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
112 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
113 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
114 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
116 
117 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
118 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
119 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
120 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
123 
124 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
125 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
126 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
127 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
128 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
130 
131 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
132 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
133 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
134 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
137 
138 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
139 {
140 	{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
141 	{mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
142 	{mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
143 	{mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
144 	{mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
145 	{mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
146 	{mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
147 	{mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
148 	{mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
149 	{mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
150 	{mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
151 	{mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
152 	{mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
153 	{mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
154 	{mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
155 	{mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
156 };
157 
158 static const u32 golden_settings_tonga_a11[] =
159 {
160 	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
161 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
162 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
163 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
164 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
165 	mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
166 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
167 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
168 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
169 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
170 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
171 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
172 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
173 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
174 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
175 };
176 
177 static const u32 tonga_golden_common_all[] =
178 {
179 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
180 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
181 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
182 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
183 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
184 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
185 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
186 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
187 };
188 
189 static const u32 tonga_mgcg_cgcg_init[] =
190 {
191 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
192 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
193 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
194 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
195 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
196 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
197 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
198 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
199 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
200 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
201 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
202 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
203 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
204 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
205 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
206 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
207 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
208 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
209 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
210 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
211 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
212 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
213 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
214 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
215 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
216 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
217 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
218 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
219 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
220 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
221 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
222 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
223 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
224 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
225 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
226 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
227 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
228 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
229 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
230 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
231 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
232 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
233 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
234 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
235 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
236 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
237 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
238 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
239 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
240 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
241 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
242 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
243 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
244 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
245 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
246 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
247 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
248 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
249 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
250 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
251 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
252 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
253 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
254 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
255 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
256 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
257 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
258 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
259 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
260 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
261 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
262 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
263 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
264 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
265 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
266 };
267 
268 static const u32 golden_settings_polaris11_a11[] =
269 {
270 	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00006208,
271 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
272 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
273 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
274 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
275 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
276 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
277 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
278 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
279 	mmSQ_CONFIG, 0x07f80000, 0x07180000,
280 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
281 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
282 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
283 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
284 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
285 };
286 
287 static const u32 polaris11_golden_common_all[] =
288 {
289 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
290 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
291 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
292 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
293 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
294 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
295 };
296 
297 static const u32 golden_settings_polaris10_a11[] =
298 {
299 	mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
300 	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00006208,
301 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
302 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
303 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
304 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
305 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
306 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
307 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
308 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
309 	mmSQ_CONFIG, 0x07f80000, 0x07180000,
310 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
311 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
312 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
313 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
314 };
315 
316 static const u32 polaris10_golden_common_all[] =
317 {
318 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
319 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
320 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
321 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
322 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
323 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
324 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
325 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
326 };
327 
328 static const u32 fiji_golden_common_all[] =
329 {
330 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
331 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
332 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
333 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
334 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
335 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
336 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
337 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
338 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
339 	mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
340 };
341 
342 static const u32 golden_settings_fiji_a10[] =
343 {
344 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
345 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
346 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
347 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
348 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
349 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
350 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
351 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
352 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
353 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
354 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
355 };
356 
357 static const u32 fiji_mgcg_cgcg_init[] =
358 {
359 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
360 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
361 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
362 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
363 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
364 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
365 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
366 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
367 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
368 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
369 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
370 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
371 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
372 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
373 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
374 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
375 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
376 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
377 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
378 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
379 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
380 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
381 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
382 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
383 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
384 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
385 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
386 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
387 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
388 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
389 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
390 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
391 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
392 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
393 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
394 };
395 
396 static const u32 golden_settings_iceland_a11[] =
397 {
398 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
399 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
400 	mmDB_DEBUG3, 0xc0000000, 0xc0000000,
401 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
402 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
403 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
404 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
405 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
406 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
407 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
408 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
409 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
410 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
411 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
412 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
413 };
414 
415 static const u32 iceland_golden_common_all[] =
416 {
417 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
418 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
419 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
420 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
421 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
422 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
423 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
424 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
425 };
426 
427 static const u32 iceland_mgcg_cgcg_init[] =
428 {
429 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
430 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
431 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
432 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
433 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
434 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
435 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
436 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
437 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
438 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
439 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
440 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
441 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
442 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
443 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
444 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
445 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
446 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
447 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
448 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
449 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
450 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
451 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
452 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
453 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
454 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
455 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
456 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
457 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
458 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
459 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
460 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
461 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
462 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
463 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
464 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
465 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
466 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
467 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
468 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
469 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
470 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
471 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
472 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
473 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
474 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
475 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
476 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
477 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
478 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
479 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
480 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
481 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
482 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
483 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
484 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
485 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
486 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
487 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
488 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
489 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
490 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
491 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
492 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
493 };
494 
495 static const u32 cz_golden_settings_a11[] =
496 {
497 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
498 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
499 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
500 	mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
501 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
502 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
503 	mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
504 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
505 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
506 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
507 };
508 
509 static const u32 cz_golden_common_all[] =
510 {
511 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
512 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
513 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
514 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
515 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
516 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
517 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
518 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
519 };
520 
521 static const u32 cz_mgcg_cgcg_init[] =
522 {
523 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
524 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
525 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
526 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
527 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
528 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
529 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
530 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
531 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
532 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
533 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
534 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
535 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
536 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
537 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
538 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
539 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
540 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
541 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
542 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
543 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
544 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
545 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
546 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
547 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
548 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
549 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
550 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
551 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
552 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
553 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
554 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
555 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
556 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
557 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
558 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
559 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
560 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
561 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
562 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
563 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
564 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
565 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
566 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
567 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
568 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
569 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
570 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
571 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
572 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
573 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
574 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
575 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
576 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
577 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
578 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
579 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
580 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
581 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
582 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
583 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
584 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
585 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
586 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
587 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
588 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
589 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
590 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
591 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
592 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
593 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
594 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
595 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
596 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
597 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
598 };
599 
600 static const u32 stoney_golden_settings_a11[] =
601 {
602 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
603 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
604 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
605 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
606 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
607 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
608 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
609 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
610 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
611 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
612 };
613 
614 static const u32 stoney_golden_common_all[] =
615 {
616 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
617 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
618 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
619 	mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
620 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
621 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
622 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
623 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
624 };
625 
626 static const u32 stoney_mgcg_cgcg_init[] =
627 {
628 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
629 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
630 	mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
631 	mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
632 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
633 	mmATC_MISC_CG, 0xffffffff, 0x000c0200,
634 };
635 
636 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
637 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
638 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
639 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
640 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
641 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
642 
643 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
644 {
645 	switch (adev->asic_type) {
646 	case CHIP_TOPAZ:
647 		amdgpu_program_register_sequence(adev,
648 						 iceland_mgcg_cgcg_init,
649 						 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
650 		amdgpu_program_register_sequence(adev,
651 						 golden_settings_iceland_a11,
652 						 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
653 		amdgpu_program_register_sequence(adev,
654 						 iceland_golden_common_all,
655 						 (const u32)ARRAY_SIZE(iceland_golden_common_all));
656 		break;
657 	case CHIP_FIJI:
658 		amdgpu_program_register_sequence(adev,
659 						 fiji_mgcg_cgcg_init,
660 						 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
661 		amdgpu_program_register_sequence(adev,
662 						 golden_settings_fiji_a10,
663 						 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
664 		amdgpu_program_register_sequence(adev,
665 						 fiji_golden_common_all,
666 						 (const u32)ARRAY_SIZE(fiji_golden_common_all));
667 		break;
668 
669 	case CHIP_TONGA:
670 		amdgpu_program_register_sequence(adev,
671 						 tonga_mgcg_cgcg_init,
672 						 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
673 		amdgpu_program_register_sequence(adev,
674 						 golden_settings_tonga_a11,
675 						 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
676 		amdgpu_program_register_sequence(adev,
677 						 tonga_golden_common_all,
678 						 (const u32)ARRAY_SIZE(tonga_golden_common_all));
679 		break;
680 	case CHIP_POLARIS11:
681 		amdgpu_program_register_sequence(adev,
682 						 golden_settings_polaris11_a11,
683 						 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
684 		amdgpu_program_register_sequence(adev,
685 						 polaris11_golden_common_all,
686 						 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
687 		break;
688 	case CHIP_POLARIS10:
689 		amdgpu_program_register_sequence(adev,
690 						 golden_settings_polaris10_a11,
691 						 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
692 		amdgpu_program_register_sequence(adev,
693 						 polaris10_golden_common_all,
694 						 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
695 		break;
696 	case CHIP_CARRIZO:
697 		amdgpu_program_register_sequence(adev,
698 						 cz_mgcg_cgcg_init,
699 						 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
700 		amdgpu_program_register_sequence(adev,
701 						 cz_golden_settings_a11,
702 						 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
703 		amdgpu_program_register_sequence(adev,
704 						 cz_golden_common_all,
705 						 (const u32)ARRAY_SIZE(cz_golden_common_all));
706 		break;
707 	case CHIP_STONEY:
708 		amdgpu_program_register_sequence(adev,
709 						 stoney_mgcg_cgcg_init,
710 						 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
711 		amdgpu_program_register_sequence(adev,
712 						 stoney_golden_settings_a11,
713 						 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
714 		amdgpu_program_register_sequence(adev,
715 						 stoney_golden_common_all,
716 						 (const u32)ARRAY_SIZE(stoney_golden_common_all));
717 		break;
718 	default:
719 		break;
720 	}
721 }
722 
723 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
724 {
725 	int i;
726 
727 	adev->gfx.scratch.num_reg = 7;
728 	adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
729 	for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
730 		adev->gfx.scratch.free[i] = true;
731 		adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
732 	}
733 }
734 
735 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
736 {
737 	struct amdgpu_device *adev = ring->adev;
738 	uint32_t scratch;
739 	uint32_t tmp = 0;
740 	unsigned i;
741 	int r;
742 
743 	r = amdgpu_gfx_scratch_get(adev, &scratch);
744 	if (r) {
745 		DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
746 		return r;
747 	}
748 	WREG32(scratch, 0xCAFEDEAD);
749 	r = amdgpu_ring_alloc(ring, 3);
750 	if (r) {
751 		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
752 			  ring->idx, r);
753 		amdgpu_gfx_scratch_free(adev, scratch);
754 		return r;
755 	}
756 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
757 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
758 	amdgpu_ring_write(ring, 0xDEADBEEF);
759 	amdgpu_ring_commit(ring);
760 
761 	for (i = 0; i < adev->usec_timeout; i++) {
762 		tmp = RREG32(scratch);
763 		if (tmp == 0xDEADBEEF)
764 			break;
765 		DRM_UDELAY(1);
766 	}
767 	if (i < adev->usec_timeout) {
768 		DRM_INFO("ring test on %d succeeded in %d usecs\n",
769 			 ring->idx, i);
770 	} else {
771 		DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
772 			  ring->idx, scratch, tmp);
773 		r = -EINVAL;
774 	}
775 	amdgpu_gfx_scratch_free(adev, scratch);
776 	return r;
777 }
778 
779 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
780 {
781 	struct amdgpu_device *adev = ring->adev;
782 	struct amdgpu_ib ib;
783 	struct fence *f = NULL;
784 	uint32_t scratch;
785 	uint32_t tmp = 0;
786 	unsigned i;
787 	int r;
788 
789 	r = amdgpu_gfx_scratch_get(adev, &scratch);
790 	if (r) {
791 		DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
792 		return r;
793 	}
794 	WREG32(scratch, 0xCAFEDEAD);
795 	memset(&ib, 0, sizeof(ib));
796 	r = amdgpu_ib_get(adev, NULL, 256, &ib);
797 	if (r) {
798 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
799 		goto err1;
800 	}
801 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
802 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
803 	ib.ptr[2] = 0xDEADBEEF;
804 	ib.length_dw = 3;
805 
806 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
807 	if (r)
808 		goto err2;
809 
810 	r = fence_wait(f, false);
811 	if (r) {
812 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
813 		goto err2;
814 	}
815 	for (i = 0; i < adev->usec_timeout; i++) {
816 		tmp = RREG32(scratch);
817 		if (tmp == 0xDEADBEEF)
818 			break;
819 		DRM_UDELAY(1);
820 	}
821 	if (i < adev->usec_timeout) {
822 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
823 			 ring->idx, i);
824 		goto err2;
825 	} else {
826 		DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
827 			  scratch, tmp);
828 		r = -EINVAL;
829 	}
830 err2:
831 	fence_put(f);
832 	amdgpu_ib_free(adev, &ib, NULL);
833 	fence_put(f);
834 err1:
835 	amdgpu_gfx_scratch_free(adev, scratch);
836 	return r;
837 }
838 
839 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
840 {
841 	const char *chip_name;
842 	char fw_name[30];
843 	int err;
844 	struct amdgpu_firmware_info *info = NULL;
845 	const struct common_firmware_header *header = NULL;
846 	const struct gfx_firmware_header_v1_0 *cp_hdr;
847 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
848 	unsigned int *tmp = NULL, i;
849 
850 	DRM_DEBUG("\n");
851 
852 	switch (adev->asic_type) {
853 	case CHIP_TOPAZ:
854 		chip_name = "topaz";
855 		break;
856 	case CHIP_TONGA:
857 		chip_name = "tonga";
858 		break;
859 	case CHIP_CARRIZO:
860 		chip_name = "carrizo";
861 		break;
862 	case CHIP_FIJI:
863 		chip_name = "fiji";
864 		break;
865 	case CHIP_POLARIS11:
866 		chip_name = "polaris11";
867 		break;
868 	case CHIP_POLARIS10:
869 		chip_name = "polaris10";
870 		break;
871 	case CHIP_STONEY:
872 		chip_name = "stoney";
873 		break;
874 	default:
875 		BUG();
876 	}
877 
878 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
879 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
880 	if (err)
881 		goto out;
882 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
883 	if (err)
884 		goto out;
885 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
886 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
887 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
888 
889 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
890 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
891 	if (err)
892 		goto out;
893 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
894 	if (err)
895 		goto out;
896 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
897 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
898 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
899 
900 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
901 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
902 	if (err)
903 		goto out;
904 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
905 	if (err)
906 		goto out;
907 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
908 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
909 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
910 
911 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
912 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
913 	if (err)
914 		goto out;
915 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
916 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
917 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
918 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
919 
920 	adev->gfx.rlc.save_and_restore_offset =
921 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
922 	adev->gfx.rlc.clear_state_descriptor_offset =
923 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
924 	adev->gfx.rlc.avail_scratch_ram_locations =
925 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
926 	adev->gfx.rlc.reg_restore_list_size =
927 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
928 	adev->gfx.rlc.reg_list_format_start =
929 			le32_to_cpu(rlc_hdr->reg_list_format_start);
930 	adev->gfx.rlc.reg_list_format_separate_start =
931 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
932 	adev->gfx.rlc.starting_offsets_start =
933 			le32_to_cpu(rlc_hdr->starting_offsets_start);
934 	adev->gfx.rlc.reg_list_format_size_bytes =
935 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
936 	adev->gfx.rlc.reg_list_size_bytes =
937 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
938 
939 	adev->gfx.rlc.register_list_format =
940 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
941 					adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
942 
943 	if (!adev->gfx.rlc.register_list_format) {
944 		err = -ENOMEM;
945 		goto out;
946 	}
947 
948 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
949 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
950 	for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
951 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
952 
953 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
954 
955 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
956 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
957 	for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
958 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
959 
960 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
961 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
962 	if (err)
963 		goto out;
964 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
965 	if (err)
966 		goto out;
967 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
968 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
969 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
970 
971 	if ((adev->asic_type != CHIP_STONEY) &&
972 	    (adev->asic_type != CHIP_TOPAZ)) {
973 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
974 		err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
975 		if (!err) {
976 			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
977 			if (err)
978 				goto out;
979 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
980 				adev->gfx.mec2_fw->data;
981 			adev->gfx.mec2_fw_version =
982 				le32_to_cpu(cp_hdr->header.ucode_version);
983 			adev->gfx.mec2_feature_version =
984 				le32_to_cpu(cp_hdr->ucode_feature_version);
985 		} else {
986 			err = 0;
987 			adev->gfx.mec2_fw = NULL;
988 		}
989 	}
990 
991 	if (adev->firmware.smu_load) {
992 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
993 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
994 		info->fw = adev->gfx.pfp_fw;
995 		header = (const struct common_firmware_header *)info->fw->data;
996 		adev->firmware.fw_size +=
997 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
998 
999 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1000 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1001 		info->fw = adev->gfx.me_fw;
1002 		header = (const struct common_firmware_header *)info->fw->data;
1003 		adev->firmware.fw_size +=
1004 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1005 
1006 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1007 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1008 		info->fw = adev->gfx.ce_fw;
1009 		header = (const struct common_firmware_header *)info->fw->data;
1010 		adev->firmware.fw_size +=
1011 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1012 
1013 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1014 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1015 		info->fw = adev->gfx.rlc_fw;
1016 		header = (const struct common_firmware_header *)info->fw->data;
1017 		adev->firmware.fw_size +=
1018 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1019 
1020 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1021 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1022 		info->fw = adev->gfx.mec_fw;
1023 		header = (const struct common_firmware_header *)info->fw->data;
1024 		adev->firmware.fw_size +=
1025 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1026 
1027 		if (adev->gfx.mec2_fw) {
1028 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1029 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1030 			info->fw = adev->gfx.mec2_fw;
1031 			header = (const struct common_firmware_header *)info->fw->data;
1032 			adev->firmware.fw_size +=
1033 				ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1034 		}
1035 
1036 	}
1037 
1038 out:
1039 	if (err) {
1040 		dev_err(adev->dev,
1041 			"gfx8: Failed to load firmware \"%s\"\n",
1042 			fw_name);
1043 		release_firmware(adev->gfx.pfp_fw);
1044 		adev->gfx.pfp_fw = NULL;
1045 		release_firmware(adev->gfx.me_fw);
1046 		adev->gfx.me_fw = NULL;
1047 		release_firmware(adev->gfx.ce_fw);
1048 		adev->gfx.ce_fw = NULL;
1049 		release_firmware(adev->gfx.rlc_fw);
1050 		adev->gfx.rlc_fw = NULL;
1051 		release_firmware(adev->gfx.mec_fw);
1052 		adev->gfx.mec_fw = NULL;
1053 		release_firmware(adev->gfx.mec2_fw);
1054 		adev->gfx.mec2_fw = NULL;
1055 	}
1056 	return err;
1057 }
1058 
1059 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1060 				    volatile u32 *buffer)
1061 {
1062 	u32 count = 0, i;
1063 	const struct cs_section_def *sect = NULL;
1064 	const struct cs_extent_def *ext = NULL;
1065 
1066 	if (adev->gfx.rlc.cs_data == NULL)
1067 		return;
1068 	if (buffer == NULL)
1069 		return;
1070 
1071 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1072 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1073 
1074 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1075 	buffer[count++] = cpu_to_le32(0x80000000);
1076 	buffer[count++] = cpu_to_le32(0x80000000);
1077 
1078 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1079 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1080 			if (sect->id == SECT_CONTEXT) {
1081 				buffer[count++] =
1082 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1083 				buffer[count++] = cpu_to_le32(ext->reg_index -
1084 						PACKET3_SET_CONTEXT_REG_START);
1085 				for (i = 0; i < ext->reg_count; i++)
1086 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1087 			} else {
1088 				return;
1089 			}
1090 		}
1091 	}
1092 
1093 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1094 	buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1095 			PACKET3_SET_CONTEXT_REG_START);
1096 	switch (adev->asic_type) {
1097 	case CHIP_TONGA:
1098 	case CHIP_POLARIS10:
1099 		buffer[count++] = cpu_to_le32(0x16000012);
1100 		buffer[count++] = cpu_to_le32(0x0000002A);
1101 		break;
1102 	case CHIP_POLARIS11:
1103 		buffer[count++] = cpu_to_le32(0x16000012);
1104 		buffer[count++] = cpu_to_le32(0x00000000);
1105 		break;
1106 	case CHIP_FIJI:
1107 		buffer[count++] = cpu_to_le32(0x3a00161a);
1108 		buffer[count++] = cpu_to_le32(0x0000002e);
1109 		break;
1110 	case CHIP_TOPAZ:
1111 	case CHIP_CARRIZO:
1112 		buffer[count++] = cpu_to_le32(0x00000002);
1113 		buffer[count++] = cpu_to_le32(0x00000000);
1114 		break;
1115 	case CHIP_STONEY:
1116 		buffer[count++] = cpu_to_le32(0x00000000);
1117 		buffer[count++] = cpu_to_le32(0x00000000);
1118 		break;
1119 	default:
1120 		buffer[count++] = cpu_to_le32(0x00000000);
1121 		buffer[count++] = cpu_to_le32(0x00000000);
1122 		break;
1123 	}
1124 
1125 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1126 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1127 
1128 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1129 	buffer[count++] = cpu_to_le32(0);
1130 }
1131 
1132 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1133 {
1134 	int r;
1135 
1136 	/* clear state block */
1137 	if (adev->gfx.rlc.clear_state_obj) {
1138 		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1139 		if (unlikely(r != 0))
1140 			dev_warn(adev->dev, "(%d) reserve RLC c bo failed\n", r);
1141 		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1142 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1143 
1144 		amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1145 		adev->gfx.rlc.clear_state_obj = NULL;
1146 	}
1147 }
1148 
1149 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1150 {
1151 	volatile u32 *dst_ptr;
1152 	u32 dws;
1153 	const struct cs_section_def *cs_data;
1154 	int r;
1155 
1156 	adev->gfx.rlc.cs_data = vi_cs_data;
1157 
1158 	cs_data = adev->gfx.rlc.cs_data;
1159 
1160 	if (cs_data) {
1161 		/* clear state block */
1162 		adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1163 
1164 		if (adev->gfx.rlc.clear_state_obj == NULL) {
1165 			r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1166 					     AMDGPU_GEM_DOMAIN_VRAM,
1167 					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1168 					     NULL, NULL,
1169 					     &adev->gfx.rlc.clear_state_obj);
1170 			if (r) {
1171 				dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1172 				gfx_v8_0_rlc_fini(adev);
1173 				return r;
1174 			}
1175 		}
1176 		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1177 		if (unlikely(r != 0)) {
1178 			gfx_v8_0_rlc_fini(adev);
1179 			return r;
1180 		}
1181 		r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1182 				  &adev->gfx.rlc.clear_state_gpu_addr);
1183 		if (r) {
1184 			amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1185 			dev_warn(adev->dev, "(%d) pin RLC c bo failed\n", r);
1186 			gfx_v8_0_rlc_fini(adev);
1187 			return r;
1188 		}
1189 
1190 		r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1191 		if (r) {
1192 			dev_warn(adev->dev, "(%d) map RLC c bo failed\n", r);
1193 			gfx_v8_0_rlc_fini(adev);
1194 			return r;
1195 		}
1196 		/* set up the cs buffer */
1197 		dst_ptr = adev->gfx.rlc.cs_ptr;
1198 		gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1199 		amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1200 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1201 	}
1202 
1203 	return 0;
1204 }
1205 
1206 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1207 {
1208 	int r;
1209 
1210 	if (adev->gfx.mec.hpd_eop_obj) {
1211 		r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1212 		if (unlikely(r != 0))
1213 			dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1214 		amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1215 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1216 
1217 		amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1218 		adev->gfx.mec.hpd_eop_obj = NULL;
1219 	}
1220 }
1221 
1222 #define MEC_HPD_SIZE 2048
1223 
1224 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1225 {
1226 	int r;
1227 	u32 *hpd;
1228 
1229 	/*
1230 	 * we assign only 1 pipe because all other pipes will
1231 	 * be handled by KFD
1232 	 */
1233 	adev->gfx.mec.num_mec = 1;
1234 	adev->gfx.mec.num_pipe = 1;
1235 	adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1236 
1237 	if (adev->gfx.mec.hpd_eop_obj == NULL) {
1238 		r = amdgpu_bo_create(adev,
1239 				     adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
1240 				     PAGE_SIZE, true,
1241 				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1242 				     &adev->gfx.mec.hpd_eop_obj);
1243 		if (r) {
1244 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1245 			return r;
1246 		}
1247 	}
1248 
1249 	r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1250 	if (unlikely(r != 0)) {
1251 		gfx_v8_0_mec_fini(adev);
1252 		return r;
1253 	}
1254 	r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1255 			  &adev->gfx.mec.hpd_eop_gpu_addr);
1256 	if (r) {
1257 		dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1258 		gfx_v8_0_mec_fini(adev);
1259 		return r;
1260 	}
1261 	r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1262 	if (r) {
1263 		dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1264 		gfx_v8_0_mec_fini(adev);
1265 		return r;
1266 	}
1267 
1268 	memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
1269 
1270 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1271 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1272 
1273 	return 0;
1274 }
1275 
1276 static const u32 vgpr_init_compute_shader[] =
1277 {
1278 	0x7e000209, 0x7e020208,
1279 	0x7e040207, 0x7e060206,
1280 	0x7e080205, 0x7e0a0204,
1281 	0x7e0c0203, 0x7e0e0202,
1282 	0x7e100201, 0x7e120200,
1283 	0x7e140209, 0x7e160208,
1284 	0x7e180207, 0x7e1a0206,
1285 	0x7e1c0205, 0x7e1e0204,
1286 	0x7e200203, 0x7e220202,
1287 	0x7e240201, 0x7e260200,
1288 	0x7e280209, 0x7e2a0208,
1289 	0x7e2c0207, 0x7e2e0206,
1290 	0x7e300205, 0x7e320204,
1291 	0x7e340203, 0x7e360202,
1292 	0x7e380201, 0x7e3a0200,
1293 	0x7e3c0209, 0x7e3e0208,
1294 	0x7e400207, 0x7e420206,
1295 	0x7e440205, 0x7e460204,
1296 	0x7e480203, 0x7e4a0202,
1297 	0x7e4c0201, 0x7e4e0200,
1298 	0x7e500209, 0x7e520208,
1299 	0x7e540207, 0x7e560206,
1300 	0x7e580205, 0x7e5a0204,
1301 	0x7e5c0203, 0x7e5e0202,
1302 	0x7e600201, 0x7e620200,
1303 	0x7e640209, 0x7e660208,
1304 	0x7e680207, 0x7e6a0206,
1305 	0x7e6c0205, 0x7e6e0204,
1306 	0x7e700203, 0x7e720202,
1307 	0x7e740201, 0x7e760200,
1308 	0x7e780209, 0x7e7a0208,
1309 	0x7e7c0207, 0x7e7e0206,
1310 	0xbf8a0000, 0xbf810000,
1311 };
1312 
1313 static const u32 sgpr_init_compute_shader[] =
1314 {
1315 	0xbe8a0100, 0xbe8c0102,
1316 	0xbe8e0104, 0xbe900106,
1317 	0xbe920108, 0xbe940100,
1318 	0xbe960102, 0xbe980104,
1319 	0xbe9a0106, 0xbe9c0108,
1320 	0xbe9e0100, 0xbea00102,
1321 	0xbea20104, 0xbea40106,
1322 	0xbea60108, 0xbea80100,
1323 	0xbeaa0102, 0xbeac0104,
1324 	0xbeae0106, 0xbeb00108,
1325 	0xbeb20100, 0xbeb40102,
1326 	0xbeb60104, 0xbeb80106,
1327 	0xbeba0108, 0xbebc0100,
1328 	0xbebe0102, 0xbec00104,
1329 	0xbec20106, 0xbec40108,
1330 	0xbec60100, 0xbec80102,
1331 	0xbee60004, 0xbee70005,
1332 	0xbeea0006, 0xbeeb0007,
1333 	0xbee80008, 0xbee90009,
1334 	0xbefc0000, 0xbf8a0000,
1335 	0xbf810000, 0x00000000,
1336 };
1337 
1338 static const u32 vgpr_init_regs[] =
1339 {
1340 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1341 	mmCOMPUTE_RESOURCE_LIMITS, 0,
1342 	mmCOMPUTE_NUM_THREAD_X, 256*4,
1343 	mmCOMPUTE_NUM_THREAD_Y, 1,
1344 	mmCOMPUTE_NUM_THREAD_Z, 1,
1345 	mmCOMPUTE_PGM_RSRC2, 20,
1346 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1347 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1348 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1349 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1350 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1351 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1352 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1353 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1354 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1355 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1356 };
1357 
1358 static const u32 sgpr1_init_regs[] =
1359 {
1360 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1361 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1362 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1363 	mmCOMPUTE_NUM_THREAD_Y, 1,
1364 	mmCOMPUTE_NUM_THREAD_Z, 1,
1365 	mmCOMPUTE_PGM_RSRC2, 20,
1366 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1367 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1368 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1369 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1370 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1371 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1372 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1373 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1374 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1375 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1376 };
1377 
1378 static const u32 sgpr2_init_regs[] =
1379 {
1380 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1381 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1382 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1383 	mmCOMPUTE_NUM_THREAD_Y, 1,
1384 	mmCOMPUTE_NUM_THREAD_Z, 1,
1385 	mmCOMPUTE_PGM_RSRC2, 20,
1386 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1387 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1388 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1389 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1390 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1391 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1392 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1393 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1394 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1395 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1396 };
1397 
1398 static const u32 sec_ded_counter_registers[] =
1399 {
1400 	mmCPC_EDC_ATC_CNT,
1401 	mmCPC_EDC_SCRATCH_CNT,
1402 	mmCPC_EDC_UCODE_CNT,
1403 	mmCPF_EDC_ATC_CNT,
1404 	mmCPF_EDC_ROQ_CNT,
1405 	mmCPF_EDC_TAG_CNT,
1406 	mmCPG_EDC_ATC_CNT,
1407 	mmCPG_EDC_DMA_CNT,
1408 	mmCPG_EDC_TAG_CNT,
1409 	mmDC_EDC_CSINVOC_CNT,
1410 	mmDC_EDC_RESTORE_CNT,
1411 	mmDC_EDC_STATE_CNT,
1412 	mmGDS_EDC_CNT,
1413 	mmGDS_EDC_GRBM_CNT,
1414 	mmGDS_EDC_OA_DED,
1415 	mmSPI_EDC_CNT,
1416 	mmSQC_ATC_EDC_GATCL1_CNT,
1417 	mmSQC_EDC_CNT,
1418 	mmSQ_EDC_DED_CNT,
1419 	mmSQ_EDC_INFO,
1420 	mmSQ_EDC_SEC_CNT,
1421 	mmTCC_EDC_CNT,
1422 	mmTCP_ATC_EDC_GATCL1_CNT,
1423 	mmTCP_EDC_CNT,
1424 	mmTD_EDC_CNT
1425 };
1426 
1427 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1428 {
1429 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1430 	struct amdgpu_ib ib;
1431 	struct fence *f = NULL;
1432 	int r, i;
1433 	u32 tmp;
1434 	unsigned total_size, vgpr_offset, sgpr_offset;
1435 	u64 gpu_addr;
1436 
1437 	/* only supported on CZ */
1438 	if (adev->asic_type != CHIP_CARRIZO)
1439 		return 0;
1440 
1441 	/* bail if the compute ring is not ready */
1442 	if (!ring->ready)
1443 		return 0;
1444 
1445 	tmp = RREG32(mmGB_EDC_MODE);
1446 	WREG32(mmGB_EDC_MODE, 0);
1447 
1448 	total_size =
1449 		(((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1450 	total_size +=
1451 		(((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1452 	total_size +=
1453 		(((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1454 	total_size = ALIGN(total_size, 256);
1455 	vgpr_offset = total_size;
1456 	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1457 	sgpr_offset = total_size;
1458 	total_size += sizeof(sgpr_init_compute_shader);
1459 
1460 	/* allocate an indirect buffer to put the commands in */
1461 	memset(&ib, 0, sizeof(ib));
1462 	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1463 	if (r) {
1464 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1465 		return r;
1466 	}
1467 
1468 	/* load the compute shaders */
1469 	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1470 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1471 
1472 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1473 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1474 
1475 	/* init the ib length to 0 */
1476 	ib.length_dw = 0;
1477 
1478 	/* VGPR */
1479 	/* write the register state for the compute dispatch */
1480 	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1481 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1482 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1483 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1484 	}
1485 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1486 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1487 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1488 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1489 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1490 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1491 
1492 	/* write dispatch packet */
1493 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1494 	ib.ptr[ib.length_dw++] = 8; /* x */
1495 	ib.ptr[ib.length_dw++] = 1; /* y */
1496 	ib.ptr[ib.length_dw++] = 1; /* z */
1497 	ib.ptr[ib.length_dw++] =
1498 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1499 
1500 	/* write CS partial flush packet */
1501 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1502 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1503 
1504 	/* SGPR1 */
1505 	/* write the register state for the compute dispatch */
1506 	for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1507 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1508 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1509 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1510 	}
1511 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1512 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1513 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1514 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1515 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1516 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1517 
1518 	/* write dispatch packet */
1519 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1520 	ib.ptr[ib.length_dw++] = 8; /* x */
1521 	ib.ptr[ib.length_dw++] = 1; /* y */
1522 	ib.ptr[ib.length_dw++] = 1; /* z */
1523 	ib.ptr[ib.length_dw++] =
1524 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1525 
1526 	/* write CS partial flush packet */
1527 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1528 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1529 
1530 	/* SGPR2 */
1531 	/* write the register state for the compute dispatch */
1532 	for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1533 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1534 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1535 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1536 	}
1537 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1538 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1539 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1540 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1541 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1542 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1543 
1544 	/* write dispatch packet */
1545 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1546 	ib.ptr[ib.length_dw++] = 8; /* x */
1547 	ib.ptr[ib.length_dw++] = 1; /* y */
1548 	ib.ptr[ib.length_dw++] = 1; /* z */
1549 	ib.ptr[ib.length_dw++] =
1550 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1551 
1552 	/* write CS partial flush packet */
1553 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1554 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1555 
1556 	/* shedule the ib on the ring */
1557 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
1558 	if (r) {
1559 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1560 		goto fail;
1561 	}
1562 
1563 	/* wait for the GPU to finish processing the IB */
1564 	r = fence_wait(f, false);
1565 	if (r) {
1566 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1567 		goto fail;
1568 	}
1569 
1570 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1571 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1572 	WREG32(mmGB_EDC_MODE, tmp);
1573 
1574 	tmp = RREG32(mmCC_GC_EDC_CONFIG);
1575 	tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1576 	WREG32(mmCC_GC_EDC_CONFIG, tmp);
1577 
1578 
1579 	/* read back registers to clear the counters */
1580 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1581 		RREG32(sec_ded_counter_registers[i]);
1582 
1583 fail:
1584 	fence_put(f);
1585 	amdgpu_ib_free(adev, &ib, NULL);
1586 	fence_put(f);
1587 
1588 	return r;
1589 }
1590 
1591 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1592 {
1593 	u32 gb_addr_config;
1594 	u32 mc_shared_chmap, mc_arb_ramcfg;
1595 	u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1596 	u32 tmp;
1597 	int ret;
1598 
1599 	switch (adev->asic_type) {
1600 	case CHIP_TOPAZ:
1601 		adev->gfx.config.max_shader_engines = 1;
1602 		adev->gfx.config.max_tile_pipes = 2;
1603 		adev->gfx.config.max_cu_per_sh = 6;
1604 		adev->gfx.config.max_sh_per_se = 1;
1605 		adev->gfx.config.max_backends_per_se = 2;
1606 		adev->gfx.config.max_texture_channel_caches = 2;
1607 		adev->gfx.config.max_gprs = 256;
1608 		adev->gfx.config.max_gs_threads = 32;
1609 		adev->gfx.config.max_hw_contexts = 8;
1610 
1611 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1612 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1613 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1614 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1615 		gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1616 		break;
1617 	case CHIP_FIJI:
1618 		adev->gfx.config.max_shader_engines = 4;
1619 		adev->gfx.config.max_tile_pipes = 16;
1620 		adev->gfx.config.max_cu_per_sh = 16;
1621 		adev->gfx.config.max_sh_per_se = 1;
1622 		adev->gfx.config.max_backends_per_se = 4;
1623 		adev->gfx.config.max_texture_channel_caches = 16;
1624 		adev->gfx.config.max_gprs = 256;
1625 		adev->gfx.config.max_gs_threads = 32;
1626 		adev->gfx.config.max_hw_contexts = 8;
1627 
1628 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1629 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1630 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1631 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1632 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1633 		break;
1634 	case CHIP_POLARIS11:
1635 		ret = amdgpu_atombios_get_gfx_info(adev);
1636 		if (ret)
1637 			return ret;
1638 		adev->gfx.config.max_gprs = 256;
1639 		adev->gfx.config.max_gs_threads = 32;
1640 		adev->gfx.config.max_hw_contexts = 8;
1641 
1642 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1643 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1644 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1645 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1646 		gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1647 		break;
1648 	case CHIP_POLARIS10:
1649 		ret = amdgpu_atombios_get_gfx_info(adev);
1650 		if (ret)
1651 			return ret;
1652 		adev->gfx.config.max_gprs = 256;
1653 		adev->gfx.config.max_gs_threads = 32;
1654 		adev->gfx.config.max_hw_contexts = 8;
1655 
1656 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1657 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1658 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1659 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1660 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1661 		break;
1662 	case CHIP_TONGA:
1663 		adev->gfx.config.max_shader_engines = 4;
1664 		adev->gfx.config.max_tile_pipes = 8;
1665 		adev->gfx.config.max_cu_per_sh = 8;
1666 		adev->gfx.config.max_sh_per_se = 1;
1667 		adev->gfx.config.max_backends_per_se = 2;
1668 		adev->gfx.config.max_texture_channel_caches = 8;
1669 		adev->gfx.config.max_gprs = 256;
1670 		adev->gfx.config.max_gs_threads = 32;
1671 		adev->gfx.config.max_hw_contexts = 8;
1672 
1673 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1674 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1675 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1676 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1677 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1678 		break;
1679 	case CHIP_CARRIZO:
1680 		adev->gfx.config.max_shader_engines = 1;
1681 		adev->gfx.config.max_tile_pipes = 2;
1682 		adev->gfx.config.max_sh_per_se = 1;
1683 		adev->gfx.config.max_backends_per_se = 2;
1684 
1685 		switch (adev->pdev->revision) {
1686 		case 0xc4:
1687 		case 0x84:
1688 		case 0xc8:
1689 		case 0xcc:
1690 		case 0xe1:
1691 		case 0xe3:
1692 			/* B10 */
1693 			adev->gfx.config.max_cu_per_sh = 8;
1694 			break;
1695 		case 0xc5:
1696 		case 0x81:
1697 		case 0x85:
1698 		case 0xc9:
1699 		case 0xcd:
1700 		case 0xe2:
1701 		case 0xe4:
1702 			/* B8 */
1703 			adev->gfx.config.max_cu_per_sh = 6;
1704 			break;
1705 		case 0xc6:
1706 		case 0xca:
1707 		case 0xce:
1708 		case 0x88:
1709 			/* B6 */
1710 			adev->gfx.config.max_cu_per_sh = 6;
1711 			break;
1712 		case 0xc7:
1713 		case 0x87:
1714 		case 0xcb:
1715 		case 0xe5:
1716 		case 0x89:
1717 		default:
1718 			/* B4 */
1719 			adev->gfx.config.max_cu_per_sh = 4;
1720 			break;
1721 		}
1722 
1723 		adev->gfx.config.max_texture_channel_caches = 2;
1724 		adev->gfx.config.max_gprs = 256;
1725 		adev->gfx.config.max_gs_threads = 32;
1726 		adev->gfx.config.max_hw_contexts = 8;
1727 
1728 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1729 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1730 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1731 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1732 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1733 		break;
1734 	case CHIP_STONEY:
1735 		adev->gfx.config.max_shader_engines = 1;
1736 		adev->gfx.config.max_tile_pipes = 2;
1737 		adev->gfx.config.max_sh_per_se = 1;
1738 		adev->gfx.config.max_backends_per_se = 1;
1739 
1740 		switch (adev->pdev->revision) {
1741 		case 0xc0:
1742 		case 0xc1:
1743 		case 0xc2:
1744 		case 0xc4:
1745 		case 0xc8:
1746 		case 0xc9:
1747 			adev->gfx.config.max_cu_per_sh = 3;
1748 			break;
1749 		case 0xd0:
1750 		case 0xd1:
1751 		case 0xd2:
1752 		default:
1753 			adev->gfx.config.max_cu_per_sh = 2;
1754 			break;
1755 		}
1756 
1757 		adev->gfx.config.max_texture_channel_caches = 2;
1758 		adev->gfx.config.max_gprs = 256;
1759 		adev->gfx.config.max_gs_threads = 16;
1760 		adev->gfx.config.max_hw_contexts = 8;
1761 
1762 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1763 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1764 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1765 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1766 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1767 		break;
1768 	default:
1769 		adev->gfx.config.max_shader_engines = 2;
1770 		adev->gfx.config.max_tile_pipes = 4;
1771 		adev->gfx.config.max_cu_per_sh = 2;
1772 		adev->gfx.config.max_sh_per_se = 1;
1773 		adev->gfx.config.max_backends_per_se = 2;
1774 		adev->gfx.config.max_texture_channel_caches = 4;
1775 		adev->gfx.config.max_gprs = 256;
1776 		adev->gfx.config.max_gs_threads = 32;
1777 		adev->gfx.config.max_hw_contexts = 8;
1778 
1779 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1780 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1781 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1782 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1783 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1784 		break;
1785 	}
1786 
1787 	mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1788 	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1789 	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1790 
1791 	adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1792 	adev->gfx.config.mem_max_burst_length_bytes = 256;
1793 	if (adev->flags & AMD_IS_APU) {
1794 		/* Get memory bank mapping mode. */
1795 		tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1796 		dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1797 		dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1798 
1799 		tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1800 		dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1801 		dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1802 
1803 		/* Validate settings in case only one DIMM installed. */
1804 		if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1805 			dimm00_addr_map = 0;
1806 		if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1807 			dimm01_addr_map = 0;
1808 		if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1809 			dimm10_addr_map = 0;
1810 		if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1811 			dimm11_addr_map = 0;
1812 
1813 		/* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1814 		/* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1815 		if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1816 			adev->gfx.config.mem_row_size_in_kb = 2;
1817 		else
1818 			adev->gfx.config.mem_row_size_in_kb = 1;
1819 	} else {
1820 		tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1821 		adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1822 		if (adev->gfx.config.mem_row_size_in_kb > 4)
1823 			adev->gfx.config.mem_row_size_in_kb = 4;
1824 	}
1825 
1826 	adev->gfx.config.shader_engine_tile_size = 32;
1827 	adev->gfx.config.num_gpus = 1;
1828 	adev->gfx.config.multi_gpu_tile_size = 64;
1829 
1830 	/* fix up row size */
1831 	switch (adev->gfx.config.mem_row_size_in_kb) {
1832 	case 1:
1833 	default:
1834 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1835 		break;
1836 	case 2:
1837 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1838 		break;
1839 	case 4:
1840 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1841 		break;
1842 	}
1843 	adev->gfx.config.gb_addr_config = gb_addr_config;
1844 
1845 	return 0;
1846 }
1847 
1848 static int gfx_v8_0_sw_init(void *handle)
1849 {
1850 	int i, r;
1851 	struct amdgpu_ring *ring;
1852 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1853 
1854 	/* EOP Event */
1855 	r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1856 	if (r)
1857 		return r;
1858 
1859 	/* Privileged reg */
1860 	r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1861 	if (r)
1862 		return r;
1863 
1864 	/* Privileged inst */
1865 	r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
1866 	if (r)
1867 		return r;
1868 
1869 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1870 
1871 	gfx_v8_0_scratch_init(adev);
1872 
1873 	r = gfx_v8_0_init_microcode(adev);
1874 	if (r) {
1875 		DRM_ERROR("Failed to load gfx firmware!\n");
1876 		return r;
1877 	}
1878 
1879 	r = gfx_v8_0_rlc_init(adev);
1880 	if (r) {
1881 		DRM_ERROR("Failed to init rlc BOs!\n");
1882 		return r;
1883 	}
1884 
1885 	r = gfx_v8_0_mec_init(adev);
1886 	if (r) {
1887 		DRM_ERROR("Failed to init MEC BOs!\n");
1888 		return r;
1889 	}
1890 
1891 	/* set up the gfx ring */
1892 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1893 		ring = &adev->gfx.gfx_ring[i];
1894 		ring->ring_obj = NULL;
1895 		sprintf(ring->name, "gfx");
1896 		/* no gfx doorbells on iceland */
1897 		if (adev->asic_type != CHIP_TOPAZ) {
1898 			ring->use_doorbell = true;
1899 			ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
1900 		}
1901 
1902 		r = amdgpu_ring_init(adev, ring, 1024,
1903 				     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1904 				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
1905 				     AMDGPU_RING_TYPE_GFX);
1906 		if (r)
1907 			return r;
1908 	}
1909 
1910 	/* set up the compute queues */
1911 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1912 		unsigned irq_type;
1913 
1914 		/* max 32 queues per MEC */
1915 		if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
1916 			DRM_ERROR("Too many (%d) compute rings!\n", i);
1917 			break;
1918 		}
1919 		ring = &adev->gfx.compute_ring[i];
1920 		ring->ring_obj = NULL;
1921 		ring->use_doorbell = true;
1922 		ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
1923 		ring->me = 1; /* first MEC */
1924 		ring->pipe = i / 8;
1925 		ring->queue = i % 8;
1926 		sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1927 		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
1928 		/* type-2 packets are deprecated on MEC, use type-3 instead */
1929 		r = amdgpu_ring_init(adev, ring, 1024,
1930 				     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1931 				     &adev->gfx.eop_irq, irq_type,
1932 				     AMDGPU_RING_TYPE_COMPUTE);
1933 		if (r)
1934 			return r;
1935 	}
1936 
1937 	/* reserve GDS, GWS and OA resource for gfx */
1938 	r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
1939 			PAGE_SIZE, true,
1940 			AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
1941 			NULL, &adev->gds.gds_gfx_bo);
1942 	if (r)
1943 		return r;
1944 
1945 	r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
1946 		PAGE_SIZE, true,
1947 		AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
1948 		NULL, &adev->gds.gws_gfx_bo);
1949 	if (r)
1950 		return r;
1951 
1952 	r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
1953 			PAGE_SIZE, true,
1954 			AMDGPU_GEM_DOMAIN_OA, 0, NULL,
1955 			NULL, &adev->gds.oa_gfx_bo);
1956 	if (r)
1957 		return r;
1958 
1959 	adev->gfx.ce_ram_size = 0x8000;
1960 
1961 	r = gfx_v8_0_gpu_early_init(adev);
1962 	if (r)
1963 		return r;
1964 
1965 	return 0;
1966 }
1967 
1968 static int gfx_v8_0_sw_fini(void *handle)
1969 {
1970 	int i;
1971 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1972 
1973 	amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
1974 	amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
1975 	amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
1976 
1977 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1978 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1979 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
1980 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1981 
1982 	gfx_v8_0_mec_fini(adev);
1983 
1984 	gfx_v8_0_rlc_fini(adev);
1985 
1986 	kfree(adev->gfx.rlc.register_list_format);
1987 
1988 	return 0;
1989 }
1990 
1991 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
1992 {
1993 	uint32_t *modearray, *mod2array;
1994 	const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
1995 	const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
1996 	u32 reg_offset;
1997 
1998 	modearray = adev->gfx.config.tile_mode_array;
1999 	mod2array = adev->gfx.config.macrotile_mode_array;
2000 
2001 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2002 		modearray[reg_offset] = 0;
2003 
2004 	for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2005 		mod2array[reg_offset] = 0;
2006 
2007 	switch (adev->asic_type) {
2008 	case CHIP_TOPAZ:
2009 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2010 				PIPE_CONFIG(ADDR_SURF_P2) |
2011 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2012 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2013 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2014 				PIPE_CONFIG(ADDR_SURF_P2) |
2015 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2016 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2017 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2018 				PIPE_CONFIG(ADDR_SURF_P2) |
2019 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2020 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2021 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2022 				PIPE_CONFIG(ADDR_SURF_P2) |
2023 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2024 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2025 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2026 				PIPE_CONFIG(ADDR_SURF_P2) |
2027 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2028 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2029 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2030 				PIPE_CONFIG(ADDR_SURF_P2) |
2031 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2032 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2033 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2034 				PIPE_CONFIG(ADDR_SURF_P2) |
2035 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2036 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2037 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2038 				PIPE_CONFIG(ADDR_SURF_P2));
2039 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2040 				PIPE_CONFIG(ADDR_SURF_P2) |
2041 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2042 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2043 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2044 				 PIPE_CONFIG(ADDR_SURF_P2) |
2045 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2046 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2047 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2048 				 PIPE_CONFIG(ADDR_SURF_P2) |
2049 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2050 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2051 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2052 				 PIPE_CONFIG(ADDR_SURF_P2) |
2053 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2054 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2055 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2056 				 PIPE_CONFIG(ADDR_SURF_P2) |
2057 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2058 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2059 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2060 				 PIPE_CONFIG(ADDR_SURF_P2) |
2061 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2062 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2063 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2064 				 PIPE_CONFIG(ADDR_SURF_P2) |
2065 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2066 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2067 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2068 				 PIPE_CONFIG(ADDR_SURF_P2) |
2069 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2070 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2071 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2072 				 PIPE_CONFIG(ADDR_SURF_P2) |
2073 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2074 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2075 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2076 				 PIPE_CONFIG(ADDR_SURF_P2) |
2077 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2078 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2079 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2080 				 PIPE_CONFIG(ADDR_SURF_P2) |
2081 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2082 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2083 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2084 				 PIPE_CONFIG(ADDR_SURF_P2) |
2085 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2086 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2087 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2088 				 PIPE_CONFIG(ADDR_SURF_P2) |
2089 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2090 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2091 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2092 				 PIPE_CONFIG(ADDR_SURF_P2) |
2093 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2094 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2095 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2096 				 PIPE_CONFIG(ADDR_SURF_P2) |
2097 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2098 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2099 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2100 				 PIPE_CONFIG(ADDR_SURF_P2) |
2101 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2102 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2103 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2104 				 PIPE_CONFIG(ADDR_SURF_P2) |
2105 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2106 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2107 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2108 				 PIPE_CONFIG(ADDR_SURF_P2) |
2109 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2110 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2111 
2112 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2113 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2114 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2115 				NUM_BANKS(ADDR_SURF_8_BANK));
2116 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2117 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2118 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2119 				NUM_BANKS(ADDR_SURF_8_BANK));
2120 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2121 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2122 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2123 				NUM_BANKS(ADDR_SURF_8_BANK));
2124 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2125 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2126 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2127 				NUM_BANKS(ADDR_SURF_8_BANK));
2128 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2129 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2130 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2131 				NUM_BANKS(ADDR_SURF_8_BANK));
2132 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2133 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2134 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2135 				NUM_BANKS(ADDR_SURF_8_BANK));
2136 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2137 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2138 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2139 				NUM_BANKS(ADDR_SURF_8_BANK));
2140 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2141 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2142 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2143 				NUM_BANKS(ADDR_SURF_16_BANK));
2144 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2145 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2146 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2147 				NUM_BANKS(ADDR_SURF_16_BANK));
2148 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2149 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2150 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2151 				 NUM_BANKS(ADDR_SURF_16_BANK));
2152 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2153 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2154 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2155 				 NUM_BANKS(ADDR_SURF_16_BANK));
2156 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2157 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2158 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2159 				 NUM_BANKS(ADDR_SURF_16_BANK));
2160 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2161 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2162 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2163 				 NUM_BANKS(ADDR_SURF_16_BANK));
2164 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2165 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2166 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2167 				 NUM_BANKS(ADDR_SURF_8_BANK));
2168 
2169 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2170 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2171 			    reg_offset != 23)
2172 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2173 
2174 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2175 			if (reg_offset != 7)
2176 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2177 
2178 		break;
2179 	case CHIP_FIJI:
2180 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2181 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2182 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2183 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2184 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2185 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2186 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2187 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2188 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2189 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2190 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2191 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2192 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2193 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2194 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2195 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2196 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2197 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2198 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2199 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2200 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2201 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2202 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2203 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2204 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2205 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2206 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2207 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2208 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2209 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2210 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2211 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2212 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2213 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2214 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2215 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2216 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2217 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2218 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2219 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2220 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2221 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2222 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2223 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2224 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2225 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2226 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2227 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2228 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2229 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2230 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2231 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2232 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2233 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2234 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2235 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2236 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2237 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2238 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2239 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2240 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2241 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2242 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2243 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2244 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2245 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2246 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2247 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2248 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2249 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2250 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2251 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2252 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2253 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2254 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2255 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2256 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2257 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2258 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2259 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2260 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2261 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2262 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2263 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2264 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2265 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2266 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2267 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2268 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2269 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2270 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2271 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2272 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2273 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2274 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2275 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2276 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2277 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2278 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2279 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2280 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2281 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2282 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2283 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2284 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2285 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2286 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2287 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2288 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2289 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2290 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2291 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2292 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2293 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2294 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2295 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2296 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2297 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2298 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2299 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2300 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2301 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2302 
2303 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2304 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2305 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2306 				NUM_BANKS(ADDR_SURF_8_BANK));
2307 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2308 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2309 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2310 				NUM_BANKS(ADDR_SURF_8_BANK));
2311 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2312 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2313 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2314 				NUM_BANKS(ADDR_SURF_8_BANK));
2315 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2316 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2317 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2318 				NUM_BANKS(ADDR_SURF_8_BANK));
2319 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2320 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2321 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2322 				NUM_BANKS(ADDR_SURF_8_BANK));
2323 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2324 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2325 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2326 				NUM_BANKS(ADDR_SURF_8_BANK));
2327 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2328 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2329 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2330 				NUM_BANKS(ADDR_SURF_8_BANK));
2331 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2332 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2333 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2334 				NUM_BANKS(ADDR_SURF_8_BANK));
2335 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2336 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2337 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2338 				NUM_BANKS(ADDR_SURF_8_BANK));
2339 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2340 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2341 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2342 				 NUM_BANKS(ADDR_SURF_8_BANK));
2343 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2344 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2345 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2346 				 NUM_BANKS(ADDR_SURF_8_BANK));
2347 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2348 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2349 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2350 				 NUM_BANKS(ADDR_SURF_8_BANK));
2351 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2352 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2353 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2354 				 NUM_BANKS(ADDR_SURF_8_BANK));
2355 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2356 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2357 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2358 				 NUM_BANKS(ADDR_SURF_4_BANK));
2359 
2360 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2361 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2362 
2363 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2364 			if (reg_offset != 7)
2365 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2366 
2367 		break;
2368 	case CHIP_TONGA:
2369 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2370 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2371 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2372 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2373 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2374 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2375 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2376 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2377 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2378 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2379 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2380 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2381 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2382 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2383 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2384 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2385 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2386 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2387 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2388 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2389 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2390 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2391 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2392 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2393 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2394 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2395 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2396 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2397 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2398 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2399 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2400 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2401 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2402 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2403 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2404 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2405 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2406 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2407 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2408 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2409 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2410 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2411 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2412 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2413 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2414 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2415 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2416 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2417 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2418 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2419 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2420 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2421 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2422 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2423 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2424 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2425 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2426 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2427 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2428 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2429 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2430 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2431 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2432 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2433 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2434 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2435 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2436 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2437 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2438 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2439 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2440 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2441 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2442 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2443 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2444 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2445 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2446 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2447 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2448 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2449 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2450 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2451 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2452 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2453 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2454 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2455 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2456 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2457 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2458 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2459 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2460 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2461 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2462 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2463 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2464 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2465 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2466 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2467 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2468 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2469 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2470 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2471 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2472 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2473 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2474 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2475 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2476 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2477 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2478 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2479 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2480 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2481 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2482 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2483 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2484 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2485 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2486 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2487 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2488 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2489 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2490 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2491 
2492 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2493 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2494 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2495 				NUM_BANKS(ADDR_SURF_16_BANK));
2496 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2497 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2498 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2499 				NUM_BANKS(ADDR_SURF_16_BANK));
2500 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2501 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2502 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2503 				NUM_BANKS(ADDR_SURF_16_BANK));
2504 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2505 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2506 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2507 				NUM_BANKS(ADDR_SURF_16_BANK));
2508 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2509 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2510 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2511 				NUM_BANKS(ADDR_SURF_16_BANK));
2512 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2513 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2514 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2515 				NUM_BANKS(ADDR_SURF_16_BANK));
2516 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2517 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2518 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2519 				NUM_BANKS(ADDR_SURF_16_BANK));
2520 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2521 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2522 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2523 				NUM_BANKS(ADDR_SURF_16_BANK));
2524 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2525 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2526 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2527 				NUM_BANKS(ADDR_SURF_16_BANK));
2528 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2529 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2530 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2531 				 NUM_BANKS(ADDR_SURF_16_BANK));
2532 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2533 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2534 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2535 				 NUM_BANKS(ADDR_SURF_16_BANK));
2536 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2537 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2538 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2539 				 NUM_BANKS(ADDR_SURF_8_BANK));
2540 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2541 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2542 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2543 				 NUM_BANKS(ADDR_SURF_4_BANK));
2544 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2545 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2546 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2547 				 NUM_BANKS(ADDR_SURF_4_BANK));
2548 
2549 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2550 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2551 
2552 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2553 			if (reg_offset != 7)
2554 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2555 
2556 		break;
2557 	case CHIP_POLARIS11:
2558 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2559 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2560 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2561 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2562 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2563 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2564 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2565 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2566 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2567 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2568 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2569 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2570 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2571 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2572 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2573 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2574 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2575 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2576 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2577 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2578 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2579 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2580 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2581 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2582 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2583 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2584 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2585 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2586 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2587 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2588 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2589 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2590 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2591 				PIPE_CONFIG(ADDR_SURF_P4_16x16));
2592 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2593 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2594 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2595 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2596 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2597 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2598 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2599 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2600 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2601 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2602 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2603 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2604 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2605 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2606 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2607 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2608 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2609 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2610 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2611 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2612 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2613 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2614 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2615 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2616 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2617 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2618 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2619 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2620 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2621 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2622 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2623 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2624 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2625 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2626 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2627 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2628 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2629 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2630 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2631 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2632 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2633 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2634 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2635 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2636 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2637 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2638 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2639 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2640 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2641 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2642 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2643 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2644 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2645 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2646 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2647 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2648 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2649 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2650 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2651 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2652 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2653 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2654 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2655 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2656 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2657 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2658 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2659 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2660 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2661 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2662 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2663 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2664 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2665 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2666 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2667 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2668 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2669 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2670 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2671 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2672 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2673 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2674 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2675 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2676 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2677 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2678 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2679 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2680 
2681 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2682 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2683 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2684 				NUM_BANKS(ADDR_SURF_16_BANK));
2685 
2686 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2687 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2688 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2689 				NUM_BANKS(ADDR_SURF_16_BANK));
2690 
2691 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2692 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2693 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2694 				NUM_BANKS(ADDR_SURF_16_BANK));
2695 
2696 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2697 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2698 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2699 				NUM_BANKS(ADDR_SURF_16_BANK));
2700 
2701 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2702 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2703 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2704 				NUM_BANKS(ADDR_SURF_16_BANK));
2705 
2706 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2707 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2708 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2709 				NUM_BANKS(ADDR_SURF_16_BANK));
2710 
2711 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2712 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2713 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2714 				NUM_BANKS(ADDR_SURF_16_BANK));
2715 
2716 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2717 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2718 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2719 				NUM_BANKS(ADDR_SURF_16_BANK));
2720 
2721 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2722 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2723 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2724 				NUM_BANKS(ADDR_SURF_16_BANK));
2725 
2726 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2728 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2729 				NUM_BANKS(ADDR_SURF_16_BANK));
2730 
2731 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2732 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2733 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2734 				NUM_BANKS(ADDR_SURF_16_BANK));
2735 
2736 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2737 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2738 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2739 				NUM_BANKS(ADDR_SURF_16_BANK));
2740 
2741 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2742 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2743 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2744 				NUM_BANKS(ADDR_SURF_8_BANK));
2745 
2746 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2747 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2748 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2749 				NUM_BANKS(ADDR_SURF_4_BANK));
2750 
2751 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2752 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2753 
2754 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2755 			if (reg_offset != 7)
2756 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2757 
2758 		break;
2759 	case CHIP_POLARIS10:
2760 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2761 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2762 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2763 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2764 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2765 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2766 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2767 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2768 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2769 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2770 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2771 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2772 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2773 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2774 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2775 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2776 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2777 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2778 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2779 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2780 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2781 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2782 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2783 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2784 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2785 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2786 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2787 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2788 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2789 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2790 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2791 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2792 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2793 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2794 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2795 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2796 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2797 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2798 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2799 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2800 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2801 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2802 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2803 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2804 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2805 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2806 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2807 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2808 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2809 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2810 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2811 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2812 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2813 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2814 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2815 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2816 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2817 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2818 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2819 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2820 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2821 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2822 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2823 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2824 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2825 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2826 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2827 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2828 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2829 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2830 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2831 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2832 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2833 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2834 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2835 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2836 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2837 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2838 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2839 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2840 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2841 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2842 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2843 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2844 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2845 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2846 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2847 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2848 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2849 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2850 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2851 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2852 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2853 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2854 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2855 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2856 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2857 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2858 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2859 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2860 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2861 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2862 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2863 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2864 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2865 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2866 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2867 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2868 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2869 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2870 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2871 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2872 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2873 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2874 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2875 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2876 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2877 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2878 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2879 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2880 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2881 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2882 
2883 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2884 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2885 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2886 				NUM_BANKS(ADDR_SURF_16_BANK));
2887 
2888 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2889 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2890 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2891 				NUM_BANKS(ADDR_SURF_16_BANK));
2892 
2893 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2894 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2895 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2896 				NUM_BANKS(ADDR_SURF_16_BANK));
2897 
2898 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2899 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2900 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2901 				NUM_BANKS(ADDR_SURF_16_BANK));
2902 
2903 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2904 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2905 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2906 				NUM_BANKS(ADDR_SURF_16_BANK));
2907 
2908 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2909 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2910 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2911 				NUM_BANKS(ADDR_SURF_16_BANK));
2912 
2913 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2914 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2915 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2916 				NUM_BANKS(ADDR_SURF_16_BANK));
2917 
2918 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2919 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2920 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2921 				NUM_BANKS(ADDR_SURF_16_BANK));
2922 
2923 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2924 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2925 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2926 				NUM_BANKS(ADDR_SURF_16_BANK));
2927 
2928 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2929 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2930 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2931 				NUM_BANKS(ADDR_SURF_16_BANK));
2932 
2933 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2934 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2935 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2936 				NUM_BANKS(ADDR_SURF_16_BANK));
2937 
2938 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2939 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2940 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2941 				NUM_BANKS(ADDR_SURF_8_BANK));
2942 
2943 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2944 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2945 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2946 				NUM_BANKS(ADDR_SURF_4_BANK));
2947 
2948 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2949 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2950 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2951 				NUM_BANKS(ADDR_SURF_4_BANK));
2952 
2953 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2954 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2955 
2956 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2957 			if (reg_offset != 7)
2958 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2959 
2960 		break;
2961 	case CHIP_STONEY:
2962 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2963 				PIPE_CONFIG(ADDR_SURF_P2) |
2964 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2965 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2966 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2967 				PIPE_CONFIG(ADDR_SURF_P2) |
2968 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2969 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2970 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2971 				PIPE_CONFIG(ADDR_SURF_P2) |
2972 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2973 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2974 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2975 				PIPE_CONFIG(ADDR_SURF_P2) |
2976 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2977 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2978 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2979 				PIPE_CONFIG(ADDR_SURF_P2) |
2980 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2981 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2982 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2983 				PIPE_CONFIG(ADDR_SURF_P2) |
2984 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2985 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2986 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2987 				PIPE_CONFIG(ADDR_SURF_P2) |
2988 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2989 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2990 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2991 				PIPE_CONFIG(ADDR_SURF_P2));
2992 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2993 				PIPE_CONFIG(ADDR_SURF_P2) |
2994 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2995 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2996 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2997 				 PIPE_CONFIG(ADDR_SURF_P2) |
2998 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2999 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3000 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3001 				 PIPE_CONFIG(ADDR_SURF_P2) |
3002 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3003 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3004 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3005 				 PIPE_CONFIG(ADDR_SURF_P2) |
3006 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3007 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3008 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3009 				 PIPE_CONFIG(ADDR_SURF_P2) |
3010 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3011 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3012 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3013 				 PIPE_CONFIG(ADDR_SURF_P2) |
3014 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3015 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3016 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3017 				 PIPE_CONFIG(ADDR_SURF_P2) |
3018 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3019 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3020 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3021 				 PIPE_CONFIG(ADDR_SURF_P2) |
3022 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3023 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3024 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3025 				 PIPE_CONFIG(ADDR_SURF_P2) |
3026 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3027 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3028 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3029 				 PIPE_CONFIG(ADDR_SURF_P2) |
3030 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3031 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3032 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3033 				 PIPE_CONFIG(ADDR_SURF_P2) |
3034 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3035 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3036 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3037 				 PIPE_CONFIG(ADDR_SURF_P2) |
3038 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3039 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3040 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3041 				 PIPE_CONFIG(ADDR_SURF_P2) |
3042 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3043 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3044 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3045 				 PIPE_CONFIG(ADDR_SURF_P2) |
3046 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3047 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3048 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3049 				 PIPE_CONFIG(ADDR_SURF_P2) |
3050 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3051 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3052 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3053 				 PIPE_CONFIG(ADDR_SURF_P2) |
3054 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3055 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3056 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3057 				 PIPE_CONFIG(ADDR_SURF_P2) |
3058 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3059 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3060 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3061 				 PIPE_CONFIG(ADDR_SURF_P2) |
3062 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3063 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3064 
3065 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3066 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3067 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3068 				NUM_BANKS(ADDR_SURF_8_BANK));
3069 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3070 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3071 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3072 				NUM_BANKS(ADDR_SURF_8_BANK));
3073 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3074 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3075 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3076 				NUM_BANKS(ADDR_SURF_8_BANK));
3077 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3078 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3079 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3080 				NUM_BANKS(ADDR_SURF_8_BANK));
3081 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3082 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3083 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3084 				NUM_BANKS(ADDR_SURF_8_BANK));
3085 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3086 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3087 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3088 				NUM_BANKS(ADDR_SURF_8_BANK));
3089 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3090 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3091 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3092 				NUM_BANKS(ADDR_SURF_8_BANK));
3093 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3094 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3095 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3096 				NUM_BANKS(ADDR_SURF_16_BANK));
3097 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3098 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3099 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3100 				NUM_BANKS(ADDR_SURF_16_BANK));
3101 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3102 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3103 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3104 				 NUM_BANKS(ADDR_SURF_16_BANK));
3105 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3106 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3107 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3108 				 NUM_BANKS(ADDR_SURF_16_BANK));
3109 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3110 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3111 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3112 				 NUM_BANKS(ADDR_SURF_16_BANK));
3113 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3114 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3115 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3116 				 NUM_BANKS(ADDR_SURF_16_BANK));
3117 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3118 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3119 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3120 				 NUM_BANKS(ADDR_SURF_8_BANK));
3121 
3122 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3123 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3124 			    reg_offset != 23)
3125 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3126 
3127 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3128 			if (reg_offset != 7)
3129 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3130 
3131 		break;
3132 	default:
3133 		dev_warn(adev->dev,
3134 			 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3135 			 adev->asic_type);
3136 
3137 	case CHIP_CARRIZO:
3138 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3139 				PIPE_CONFIG(ADDR_SURF_P2) |
3140 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3141 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3142 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3143 				PIPE_CONFIG(ADDR_SURF_P2) |
3144 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3145 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3146 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3147 				PIPE_CONFIG(ADDR_SURF_P2) |
3148 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3149 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3150 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3151 				PIPE_CONFIG(ADDR_SURF_P2) |
3152 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3153 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3154 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3155 				PIPE_CONFIG(ADDR_SURF_P2) |
3156 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3157 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3158 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3159 				PIPE_CONFIG(ADDR_SURF_P2) |
3160 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3161 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3162 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3163 				PIPE_CONFIG(ADDR_SURF_P2) |
3164 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3165 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3166 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3167 				PIPE_CONFIG(ADDR_SURF_P2));
3168 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3169 				PIPE_CONFIG(ADDR_SURF_P2) |
3170 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3171 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3172 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3173 				 PIPE_CONFIG(ADDR_SURF_P2) |
3174 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3175 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3176 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3177 				 PIPE_CONFIG(ADDR_SURF_P2) |
3178 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3179 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3180 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3181 				 PIPE_CONFIG(ADDR_SURF_P2) |
3182 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3183 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3184 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3185 				 PIPE_CONFIG(ADDR_SURF_P2) |
3186 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3187 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3188 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3189 				 PIPE_CONFIG(ADDR_SURF_P2) |
3190 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3191 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3192 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3193 				 PIPE_CONFIG(ADDR_SURF_P2) |
3194 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3195 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3196 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3197 				 PIPE_CONFIG(ADDR_SURF_P2) |
3198 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3199 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3200 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3201 				 PIPE_CONFIG(ADDR_SURF_P2) |
3202 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3203 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3204 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3205 				 PIPE_CONFIG(ADDR_SURF_P2) |
3206 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3207 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3208 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3209 				 PIPE_CONFIG(ADDR_SURF_P2) |
3210 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3211 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3212 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3213 				 PIPE_CONFIG(ADDR_SURF_P2) |
3214 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3215 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3216 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3217 				 PIPE_CONFIG(ADDR_SURF_P2) |
3218 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3219 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3220 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3221 				 PIPE_CONFIG(ADDR_SURF_P2) |
3222 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3223 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3224 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3225 				 PIPE_CONFIG(ADDR_SURF_P2) |
3226 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3227 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3228 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3229 				 PIPE_CONFIG(ADDR_SURF_P2) |
3230 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3231 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3232 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3233 				 PIPE_CONFIG(ADDR_SURF_P2) |
3234 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3235 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3236 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3237 				 PIPE_CONFIG(ADDR_SURF_P2) |
3238 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3239 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3240 
3241 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3242 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3243 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3244 				NUM_BANKS(ADDR_SURF_8_BANK));
3245 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3246 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3247 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3248 				NUM_BANKS(ADDR_SURF_8_BANK));
3249 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3250 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3251 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3252 				NUM_BANKS(ADDR_SURF_8_BANK));
3253 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3254 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3255 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3256 				NUM_BANKS(ADDR_SURF_8_BANK));
3257 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3258 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3259 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3260 				NUM_BANKS(ADDR_SURF_8_BANK));
3261 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3262 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3263 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3264 				NUM_BANKS(ADDR_SURF_8_BANK));
3265 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3266 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3267 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3268 				NUM_BANKS(ADDR_SURF_8_BANK));
3269 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3270 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3271 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3272 				NUM_BANKS(ADDR_SURF_16_BANK));
3273 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3274 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3275 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3276 				NUM_BANKS(ADDR_SURF_16_BANK));
3277 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3278 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3279 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3280 				 NUM_BANKS(ADDR_SURF_16_BANK));
3281 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3282 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3283 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3284 				 NUM_BANKS(ADDR_SURF_16_BANK));
3285 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3286 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3287 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3288 				 NUM_BANKS(ADDR_SURF_16_BANK));
3289 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3290 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3291 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3292 				 NUM_BANKS(ADDR_SURF_16_BANK));
3293 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3294 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3295 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3296 				 NUM_BANKS(ADDR_SURF_8_BANK));
3297 
3298 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3299 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3300 			    reg_offset != 23)
3301 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3302 
3303 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3304 			if (reg_offset != 7)
3305 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3306 
3307 		break;
3308 	}
3309 }
3310 
3311 void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
3312 {
3313 	u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3314 
3315 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
3316 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3317 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3318 	} else if (se_num == 0xffffffff) {
3319 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3320 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3321 	} else if (sh_num == 0xffffffff) {
3322 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3323 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3324 	} else {
3325 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3326 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3327 	}
3328 	WREG32(mmGRBM_GFX_INDEX, data);
3329 }
3330 
3331 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3332 {
3333 	return (u32)((1ULL << bit_width) - 1);
3334 }
3335 
3336 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3337 {
3338 	u32 data, mask;
3339 
3340 	data = RREG32(mmCC_RB_BACKEND_DISABLE);
3341 	data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3342 
3343 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
3344 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
3345 
3346 	mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3347 				       adev->gfx.config.max_sh_per_se);
3348 
3349 	return (~data) & mask;
3350 }
3351 
3352 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3353 {
3354 	int i, j;
3355 	u32 data;
3356 	u32 active_rbs = 0;
3357 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3358 					adev->gfx.config.max_sh_per_se;
3359 
3360 	mutex_lock(&adev->grbm_idx_mutex);
3361 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3362 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3363 			gfx_v8_0_select_se_sh(adev, i, j);
3364 			data = gfx_v8_0_get_rb_active_bitmap(adev);
3365 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3366 					       rb_bitmap_width_per_sh);
3367 		}
3368 	}
3369 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3370 	mutex_unlock(&adev->grbm_idx_mutex);
3371 
3372 	adev->gfx.config.backend_enable_mask = active_rbs;
3373 	adev->gfx.config.num_rbs = hweight32(active_rbs);
3374 }
3375 
3376 /**
3377  * gfx_v8_0_init_compute_vmid - gart enable
3378  *
3379  * @rdev: amdgpu_device pointer
3380  *
3381  * Initialize compute vmid sh_mem registers
3382  *
3383  */
3384 #define DEFAULT_SH_MEM_BASES	(0x6000)
3385 #define FIRST_COMPUTE_VMID	(8)
3386 #define LAST_COMPUTE_VMID	(16)
3387 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3388 {
3389 	int i;
3390 	uint32_t sh_mem_config;
3391 	uint32_t sh_mem_bases;
3392 
3393 	/*
3394 	 * Configure apertures:
3395 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3396 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3397 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3398 	 */
3399 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3400 
3401 	sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3402 			SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3403 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3404 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3405 			MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3406 			SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3407 
3408 	mutex_lock(&adev->srbm_mutex);
3409 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3410 		vi_srbm_select(adev, 0, 0, 0, i);
3411 		/* CP and shaders */
3412 		WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3413 		WREG32(mmSH_MEM_APE1_BASE, 1);
3414 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3415 		WREG32(mmSH_MEM_BASES, sh_mem_bases);
3416 	}
3417 	vi_srbm_select(adev, 0, 0, 0, 0);
3418 	mutex_unlock(&adev->srbm_mutex);
3419 }
3420 
3421 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3422 {
3423 	u32 tmp;
3424 	int i;
3425 
3426 	tmp = RREG32(mmGRBM_CNTL);
3427 	tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
3428 	WREG32(mmGRBM_CNTL, tmp);
3429 
3430 	WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3431 	WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3432 	WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3433 
3434 	gfx_v8_0_tiling_mode_table_init(adev);
3435 
3436 	gfx_v8_0_setup_rb(adev);
3437 	gfx_v8_0_get_cu_info(adev);
3438 
3439 	/* XXX SH_MEM regs */
3440 	/* where to put LDS, scratch, GPUVM in FSA64 space */
3441 	mutex_lock(&adev->srbm_mutex);
3442 	for (i = 0; i < 16; i++) {
3443 		vi_srbm_select(adev, 0, 0, 0, i);
3444 		/* CP and shaders */
3445 		if (i == 0) {
3446 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3447 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3448 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3449 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3450 			WREG32(mmSH_MEM_CONFIG, tmp);
3451 		} else {
3452 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3453 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3454 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3455 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3456 			WREG32(mmSH_MEM_CONFIG, tmp);
3457 		}
3458 
3459 		WREG32(mmSH_MEM_APE1_BASE, 1);
3460 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3461 		WREG32(mmSH_MEM_BASES, 0);
3462 	}
3463 	vi_srbm_select(adev, 0, 0, 0, 0);
3464 	mutex_unlock(&adev->srbm_mutex);
3465 
3466 	gfx_v8_0_init_compute_vmid(adev);
3467 
3468 	mutex_lock(&adev->grbm_idx_mutex);
3469 	/*
3470 	 * making sure that the following register writes will be broadcasted
3471 	 * to all the shaders
3472 	 */
3473 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3474 
3475 	WREG32(mmPA_SC_FIFO_SIZE,
3476 		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
3477 			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3478 		   (adev->gfx.config.sc_prim_fifo_size_backend <<
3479 			PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3480 		   (adev->gfx.config.sc_hiz_tile_fifo_size <<
3481 			PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3482 		   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3483 			PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3484 	mutex_unlock(&adev->grbm_idx_mutex);
3485 
3486 }
3487 
3488 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3489 {
3490 	u32 i, j, k;
3491 	u32 mask;
3492 
3493 	mutex_lock(&adev->grbm_idx_mutex);
3494 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3495 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3496 			gfx_v8_0_select_se_sh(adev, i, j);
3497 			for (k = 0; k < adev->usec_timeout; k++) {
3498 				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3499 					break;
3500 				udelay(1);
3501 			}
3502 		}
3503 	}
3504 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3505 	mutex_unlock(&adev->grbm_idx_mutex);
3506 
3507 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3508 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3509 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3510 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3511 	for (k = 0; k < adev->usec_timeout; k++) {
3512 		if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3513 			break;
3514 		udelay(1);
3515 	}
3516 }
3517 
3518 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3519 					       bool enable)
3520 {
3521 	u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3522 
3523 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3524 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3525 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3526 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3527 
3528 	WREG32(mmCP_INT_CNTL_RING0, tmp);
3529 }
3530 
3531 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3532 {
3533 	/* csib */
3534 	WREG32(mmRLC_CSIB_ADDR_HI,
3535 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
3536 	WREG32(mmRLC_CSIB_ADDR_LO,
3537 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3538 	WREG32(mmRLC_CSIB_LENGTH,
3539 			adev->gfx.rlc.clear_state_size);
3540 }
3541 
3542 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3543 				int ind_offset,
3544 				int list_size,
3545 				int *unique_indices,
3546 				int *indices_count,
3547 				int max_indices,
3548 				int *ind_start_offsets,
3549 				int *offset_count,
3550 				int max_offset)
3551 {
3552 	int indices;
3553 	bool new_entry = true;
3554 
3555 	for (; ind_offset < list_size; ind_offset++) {
3556 
3557 		if (new_entry) {
3558 			new_entry = false;
3559 			ind_start_offsets[*offset_count] = ind_offset;
3560 			*offset_count = *offset_count + 1;
3561 			BUG_ON(*offset_count >= max_offset);
3562 		}
3563 
3564 		if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3565 			new_entry = true;
3566 			continue;
3567 		}
3568 
3569 		ind_offset += 2;
3570 
3571 		/* look for the matching indice */
3572 		for (indices = 0;
3573 			indices < *indices_count;
3574 			indices++) {
3575 			if (unique_indices[indices] ==
3576 				register_list_format[ind_offset])
3577 				break;
3578 		}
3579 
3580 		if (indices >= *indices_count) {
3581 			unique_indices[*indices_count] =
3582 				register_list_format[ind_offset];
3583 			indices = *indices_count;
3584 			*indices_count = *indices_count + 1;
3585 			BUG_ON(*indices_count >= max_indices);
3586 		}
3587 
3588 		register_list_format[ind_offset] = indices;
3589 	}
3590 }
3591 
3592 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3593 {
3594 	int i, temp, data;
3595 	int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3596 	int indices_count = 0;
3597 	int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3598 	int offset_count = 0;
3599 
3600 	int list_size;
3601 	unsigned int *register_list_format =
3602 		kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3603 	if (register_list_format == NULL)
3604 		return -ENOMEM;
3605 	memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3606 			adev->gfx.rlc.reg_list_format_size_bytes);
3607 
3608 	gfx_v8_0_parse_ind_reg_list(register_list_format,
3609 				RLC_FormatDirectRegListLength,
3610 				adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3611 				unique_indices,
3612 				&indices_count,
3613 				sizeof(unique_indices) / sizeof(int),
3614 				indirect_start_offsets,
3615 				&offset_count,
3616 				sizeof(indirect_start_offsets)/sizeof(int));
3617 
3618 	/* save and restore list */
3619 	temp = RREG32(mmRLC_SRM_CNTL);
3620 	temp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
3621 	WREG32(mmRLC_SRM_CNTL, temp);
3622 
3623 	WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3624 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3625 		WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3626 
3627 	/* indirect list */
3628 	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3629 	for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3630 		WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3631 
3632 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3633 	list_size = list_size >> 1;
3634 	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3635 	WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3636 
3637 	/* starting offsets starts */
3638 	WREG32(mmRLC_GPM_SCRATCH_ADDR,
3639 		adev->gfx.rlc.starting_offsets_start);
3640 	for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3641 		WREG32(mmRLC_GPM_SCRATCH_DATA,
3642 				indirect_start_offsets[i]);
3643 
3644 	/* unique indices */
3645 	temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3646 	data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3647 	for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3648 		amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false);
3649 		amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false);
3650 	}
3651 	kfree(register_list_format);
3652 
3653 	return 0;
3654 }
3655 
3656 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3657 {
3658 	uint32_t data;
3659 
3660 	data = RREG32(mmRLC_SRM_CNTL);
3661 	data |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
3662 	WREG32(mmRLC_SRM_CNTL, data);
3663 }
3664 
3665 static void polaris11_init_power_gating(struct amdgpu_device *adev)
3666 {
3667 	uint32_t data;
3668 
3669 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3670 			AMD_PG_SUPPORT_GFX_SMG |
3671 			AMD_PG_SUPPORT_GFX_DMG)) {
3672 		data = RREG32(mmCP_RB_WPTR_POLL_CNTL);
3673 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
3674 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
3675 		WREG32(mmCP_RB_WPTR_POLL_CNTL, data);
3676 
3677 		data = 0;
3678 		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
3679 		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
3680 		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
3681 		data |= (0x10 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
3682 		WREG32(mmRLC_PG_DELAY, data);
3683 
3684 		data = RREG32(mmRLC_PG_DELAY_2);
3685 		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
3686 		data |= (0x3 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
3687 		WREG32(mmRLC_PG_DELAY_2, data);
3688 
3689 		data = RREG32(mmRLC_AUTO_PG_CTRL);
3690 		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
3691 		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
3692 		WREG32(mmRLC_AUTO_PG_CTRL, data);
3693 	}
3694 }
3695 
3696 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
3697 {
3698 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3699 			      AMD_PG_SUPPORT_GFX_SMG |
3700 			      AMD_PG_SUPPORT_GFX_DMG |
3701 			      AMD_PG_SUPPORT_CP |
3702 			      AMD_PG_SUPPORT_GDS |
3703 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
3704 		gfx_v8_0_init_csb(adev);
3705 		gfx_v8_0_init_save_restore_list(adev);
3706 		gfx_v8_0_enable_save_restore_machine(adev);
3707 
3708 		if (adev->asic_type == CHIP_POLARIS11)
3709 			polaris11_init_power_gating(adev);
3710 	}
3711 }
3712 
3713 void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
3714 {
3715 	u32 tmp = RREG32(mmRLC_CNTL);
3716 
3717 	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
3718 	WREG32(mmRLC_CNTL, tmp);
3719 
3720 	gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3721 
3722 	gfx_v8_0_wait_for_rlc_serdes(adev);
3723 }
3724 
3725 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
3726 {
3727 	u32 tmp = RREG32(mmGRBM_SOFT_RESET);
3728 
3729 	tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3730 	WREG32(mmGRBM_SOFT_RESET, tmp);
3731 	udelay(50);
3732 	tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3733 	WREG32(mmGRBM_SOFT_RESET, tmp);
3734 	udelay(50);
3735 }
3736 
3737 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
3738 {
3739 	u32 tmp = RREG32(mmRLC_CNTL);
3740 
3741 	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
3742 	WREG32(mmRLC_CNTL, tmp);
3743 
3744 	/* carrizo do enable cp interrupt after cp inited */
3745 	if (!(adev->flags & AMD_IS_APU))
3746 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3747 
3748 	udelay(50);
3749 }
3750 
3751 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
3752 {
3753 	const struct rlc_firmware_header_v2_0 *hdr;
3754 	const __le32 *fw_data;
3755 	unsigned i, fw_size;
3756 
3757 	if (!adev->gfx.rlc_fw)
3758 		return -EINVAL;
3759 
3760 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3761 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
3762 
3763 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3764 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3765 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3766 
3767 	WREG32(mmRLC_GPM_UCODE_ADDR, 0);
3768 	for (i = 0; i < fw_size; i++)
3769 		WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3770 	WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3771 
3772 	return 0;
3773 }
3774 
3775 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
3776 {
3777 	int r;
3778 
3779 	gfx_v8_0_rlc_stop(adev);
3780 
3781 	/* disable CG */
3782 	WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
3783 	if (adev->asic_type == CHIP_POLARIS11 ||
3784 		adev->asic_type == CHIP_POLARIS10)
3785 		WREG32(mmRLC_CGCG_CGLS_CTRL_3D, 0);
3786 
3787 	/* disable PG */
3788 	WREG32(mmRLC_PG_CNTL, 0);
3789 
3790 	gfx_v8_0_rlc_reset(adev);
3791 
3792 	gfx_v8_0_init_pg(adev);
3793 
3794 	if (!adev->pp_enabled) {
3795 		if (!adev->firmware.smu_load) {
3796 			/* legacy rlc firmware loading */
3797 			r = gfx_v8_0_rlc_load_microcode(adev);
3798 			if (r)
3799 				return r;
3800 		} else {
3801 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3802 							AMDGPU_UCODE_ID_RLC_G);
3803 			if (r)
3804 				return -EINVAL;
3805 		}
3806 	}
3807 
3808 	gfx_v8_0_rlc_start(adev);
3809 
3810 	return 0;
3811 }
3812 
3813 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3814 {
3815 	int i;
3816 	u32 tmp = RREG32(mmCP_ME_CNTL);
3817 
3818 	if (enable) {
3819 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
3820 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
3821 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
3822 	} else {
3823 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
3824 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
3825 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
3826 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3827 			adev->gfx.gfx_ring[i].ready = false;
3828 	}
3829 	WREG32(mmCP_ME_CNTL, tmp);
3830 	udelay(50);
3831 }
3832 
3833 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3834 {
3835 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
3836 	const struct gfx_firmware_header_v1_0 *ce_hdr;
3837 	const struct gfx_firmware_header_v1_0 *me_hdr;
3838 	const __le32 *fw_data;
3839 	unsigned i, fw_size;
3840 
3841 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3842 		return -EINVAL;
3843 
3844 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3845 		adev->gfx.pfp_fw->data;
3846 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3847 		adev->gfx.ce_fw->data;
3848 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
3849 		adev->gfx.me_fw->data;
3850 
3851 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3852 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3853 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3854 
3855 	gfx_v8_0_cp_gfx_enable(adev, false);
3856 
3857 	/* PFP */
3858 	fw_data = (const __le32 *)
3859 		(adev->gfx.pfp_fw->data +
3860 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3861 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3862 	WREG32(mmCP_PFP_UCODE_ADDR, 0);
3863 	for (i = 0; i < fw_size; i++)
3864 		WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3865 	WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3866 
3867 	/* CE */
3868 	fw_data = (const __le32 *)
3869 		(adev->gfx.ce_fw->data +
3870 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3871 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3872 	WREG32(mmCP_CE_UCODE_ADDR, 0);
3873 	for (i = 0; i < fw_size; i++)
3874 		WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3875 	WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3876 
3877 	/* ME */
3878 	fw_data = (const __le32 *)
3879 		(adev->gfx.me_fw->data +
3880 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3881 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3882 	WREG32(mmCP_ME_RAM_WADDR, 0);
3883 	for (i = 0; i < fw_size; i++)
3884 		WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3885 	WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3886 
3887 	return 0;
3888 }
3889 
3890 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
3891 {
3892 	u32 count = 0;
3893 	const struct cs_section_def *sect = NULL;
3894 	const struct cs_extent_def *ext = NULL;
3895 
3896 	/* begin clear state */
3897 	count += 2;
3898 	/* context control state */
3899 	count += 3;
3900 
3901 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3902 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3903 			if (sect->id == SECT_CONTEXT)
3904 				count += 2 + ext->reg_count;
3905 			else
3906 				return 0;
3907 		}
3908 	}
3909 	/* pa_sc_raster_config/pa_sc_raster_config1 */
3910 	count += 4;
3911 	/* end clear state */
3912 	count += 2;
3913 	/* clear state */
3914 	count += 2;
3915 
3916 	return count;
3917 }
3918 
3919 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
3920 {
3921 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3922 	const struct cs_section_def *sect = NULL;
3923 	const struct cs_extent_def *ext = NULL;
3924 	int r, i;
3925 
3926 	/* init the CP */
3927 	WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3928 	WREG32(mmCP_ENDIAN_SWAP, 0);
3929 	WREG32(mmCP_DEVICE_ID, 1);
3930 
3931 	gfx_v8_0_cp_gfx_enable(adev, true);
3932 
3933 	r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
3934 	if (r) {
3935 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3936 		return r;
3937 	}
3938 
3939 	/* clear state buffer */
3940 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3941 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3942 
3943 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3944 	amdgpu_ring_write(ring, 0x80000000);
3945 	amdgpu_ring_write(ring, 0x80000000);
3946 
3947 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3948 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3949 			if (sect->id == SECT_CONTEXT) {
3950 				amdgpu_ring_write(ring,
3951 				       PACKET3(PACKET3_SET_CONTEXT_REG,
3952 					       ext->reg_count));
3953 				amdgpu_ring_write(ring,
3954 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3955 				for (i = 0; i < ext->reg_count; i++)
3956 					amdgpu_ring_write(ring, ext->extent[i]);
3957 			}
3958 		}
3959 	}
3960 
3961 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3962 	amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
3963 	switch (adev->asic_type) {
3964 	case CHIP_TONGA:
3965 	case CHIP_POLARIS10:
3966 		amdgpu_ring_write(ring, 0x16000012);
3967 		amdgpu_ring_write(ring, 0x0000002A);
3968 		break;
3969 	case CHIP_POLARIS11:
3970 		amdgpu_ring_write(ring, 0x16000012);
3971 		amdgpu_ring_write(ring, 0x00000000);
3972 		break;
3973 	case CHIP_FIJI:
3974 		amdgpu_ring_write(ring, 0x3a00161a);
3975 		amdgpu_ring_write(ring, 0x0000002e);
3976 		break;
3977 	case CHIP_TOPAZ:
3978 	case CHIP_CARRIZO:
3979 		amdgpu_ring_write(ring, 0x00000002);
3980 		amdgpu_ring_write(ring, 0x00000000);
3981 		break;
3982 	case CHIP_STONEY:
3983 		amdgpu_ring_write(ring, 0x00000000);
3984 		amdgpu_ring_write(ring, 0x00000000);
3985 		break;
3986 	default:
3987 		BUG();
3988 	}
3989 
3990 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3991 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3992 
3993 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3994 	amdgpu_ring_write(ring, 0);
3995 
3996 	/* init the CE partitions */
3997 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3998 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3999 	amdgpu_ring_write(ring, 0x8000);
4000 	amdgpu_ring_write(ring, 0x8000);
4001 
4002 	amdgpu_ring_commit(ring);
4003 
4004 	return 0;
4005 }
4006 
4007 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4008 {
4009 	struct amdgpu_ring *ring;
4010 	u32 tmp;
4011 	u32 rb_bufsz;
4012 	u64 rb_addr, rptr_addr;
4013 	int r;
4014 
4015 	/* Set the write pointer delay */
4016 	WREG32(mmCP_RB_WPTR_DELAY, 0);
4017 
4018 	/* set the RB to use vmid 0 */
4019 	WREG32(mmCP_RB_VMID, 0);
4020 
4021 	/* Set ring buffer size */
4022 	ring = &adev->gfx.gfx_ring[0];
4023 	rb_bufsz = order_base_2(ring->ring_size / 8);
4024 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4025 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4026 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4027 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4028 #ifdef __BIG_ENDIAN
4029 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4030 #endif
4031 	WREG32(mmCP_RB0_CNTL, tmp);
4032 
4033 	/* Initialize the ring buffer's read and write pointers */
4034 	WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4035 	ring->wptr = 0;
4036 	WREG32(mmCP_RB0_WPTR, ring->wptr);
4037 
4038 	/* set the wb address wether it's enabled or not */
4039 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4040 	WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4041 	WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4042 
4043 	mdelay(1);
4044 	WREG32(mmCP_RB0_CNTL, tmp);
4045 
4046 	rb_addr = ring->gpu_addr >> 8;
4047 	WREG32(mmCP_RB0_BASE, rb_addr);
4048 	WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4049 
4050 	/* no gfx doorbells on iceland */
4051 	if (adev->asic_type != CHIP_TOPAZ) {
4052 		tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4053 		if (ring->use_doorbell) {
4054 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4055 					    DOORBELL_OFFSET, ring->doorbell_index);
4056 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4057 					    DOORBELL_HIT, 0);
4058 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4059 					    DOORBELL_EN, 1);
4060 		} else {
4061 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4062 					    DOORBELL_EN, 0);
4063 		}
4064 		WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4065 
4066 		if (adev->asic_type == CHIP_TONGA) {
4067 			tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4068 					    DOORBELL_RANGE_LOWER,
4069 					    AMDGPU_DOORBELL_GFX_RING0);
4070 			WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4071 
4072 			WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4073 			       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4074 		}
4075 
4076 	}
4077 
4078 	/* start the ring */
4079 	gfx_v8_0_cp_gfx_start(adev);
4080 	ring->ready = true;
4081 	r = amdgpu_ring_test_ring(ring);
4082 	if (r) {
4083 		ring->ready = false;
4084 		return r;
4085 	}
4086 
4087 	return 0;
4088 }
4089 
4090 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4091 {
4092 	int i;
4093 
4094 	if (enable) {
4095 		WREG32(mmCP_MEC_CNTL, 0);
4096 	} else {
4097 		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4098 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
4099 			adev->gfx.compute_ring[i].ready = false;
4100 	}
4101 	udelay(50);
4102 }
4103 
4104 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4105 {
4106 	const struct gfx_firmware_header_v1_0 *mec_hdr;
4107 	const __le32 *fw_data;
4108 	unsigned i, fw_size;
4109 
4110 	if (!adev->gfx.mec_fw)
4111 		return -EINVAL;
4112 
4113 	gfx_v8_0_cp_compute_enable(adev, false);
4114 
4115 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4116 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4117 
4118 	fw_data = (const __le32 *)
4119 		(adev->gfx.mec_fw->data +
4120 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4121 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4122 
4123 	/* MEC1 */
4124 	WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4125 	for (i = 0; i < fw_size; i++)
4126 		WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4127 	WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4128 
4129 	/* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4130 	if (adev->gfx.mec2_fw) {
4131 		const struct gfx_firmware_header_v1_0 *mec2_hdr;
4132 
4133 		mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4134 		amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4135 
4136 		fw_data = (const __le32 *)
4137 			(adev->gfx.mec2_fw->data +
4138 			 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4139 		fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4140 
4141 		WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4142 		for (i = 0; i < fw_size; i++)
4143 			WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4144 		WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4145 	}
4146 
4147 	return 0;
4148 }
4149 
4150 struct vi_mqd {
4151 	uint32_t header;  /* ordinal0 */
4152 	uint32_t compute_dispatch_initiator;  /* ordinal1 */
4153 	uint32_t compute_dim_x;  /* ordinal2 */
4154 	uint32_t compute_dim_y;  /* ordinal3 */
4155 	uint32_t compute_dim_z;  /* ordinal4 */
4156 	uint32_t compute_start_x;  /* ordinal5 */
4157 	uint32_t compute_start_y;  /* ordinal6 */
4158 	uint32_t compute_start_z;  /* ordinal7 */
4159 	uint32_t compute_num_thread_x;  /* ordinal8 */
4160 	uint32_t compute_num_thread_y;  /* ordinal9 */
4161 	uint32_t compute_num_thread_z;  /* ordinal10 */
4162 	uint32_t compute_pipelinestat_enable;  /* ordinal11 */
4163 	uint32_t compute_perfcount_enable;  /* ordinal12 */
4164 	uint32_t compute_pgm_lo;  /* ordinal13 */
4165 	uint32_t compute_pgm_hi;  /* ordinal14 */
4166 	uint32_t compute_tba_lo;  /* ordinal15 */
4167 	uint32_t compute_tba_hi;  /* ordinal16 */
4168 	uint32_t compute_tma_lo;  /* ordinal17 */
4169 	uint32_t compute_tma_hi;  /* ordinal18 */
4170 	uint32_t compute_pgm_rsrc1;  /* ordinal19 */
4171 	uint32_t compute_pgm_rsrc2;  /* ordinal20 */
4172 	uint32_t compute_vmid;  /* ordinal21 */
4173 	uint32_t compute_resource_limits;  /* ordinal22 */
4174 	uint32_t compute_static_thread_mgmt_se0;  /* ordinal23 */
4175 	uint32_t compute_static_thread_mgmt_se1;  /* ordinal24 */
4176 	uint32_t compute_tmpring_size;  /* ordinal25 */
4177 	uint32_t compute_static_thread_mgmt_se2;  /* ordinal26 */
4178 	uint32_t compute_static_thread_mgmt_se3;  /* ordinal27 */
4179 	uint32_t compute_restart_x;  /* ordinal28 */
4180 	uint32_t compute_restart_y;  /* ordinal29 */
4181 	uint32_t compute_restart_z;  /* ordinal30 */
4182 	uint32_t compute_thread_trace_enable;  /* ordinal31 */
4183 	uint32_t compute_misc_reserved;  /* ordinal32 */
4184 	uint32_t compute_dispatch_id;  /* ordinal33 */
4185 	uint32_t compute_threadgroup_id;  /* ordinal34 */
4186 	uint32_t compute_relaunch;  /* ordinal35 */
4187 	uint32_t compute_wave_restore_addr_lo;  /* ordinal36 */
4188 	uint32_t compute_wave_restore_addr_hi;  /* ordinal37 */
4189 	uint32_t compute_wave_restore_control;  /* ordinal38 */
4190 	uint32_t reserved9;  /* ordinal39 */
4191 	uint32_t reserved10;  /* ordinal40 */
4192 	uint32_t reserved11;  /* ordinal41 */
4193 	uint32_t reserved12;  /* ordinal42 */
4194 	uint32_t reserved13;  /* ordinal43 */
4195 	uint32_t reserved14;  /* ordinal44 */
4196 	uint32_t reserved15;  /* ordinal45 */
4197 	uint32_t reserved16;  /* ordinal46 */
4198 	uint32_t reserved17;  /* ordinal47 */
4199 	uint32_t reserved18;  /* ordinal48 */
4200 	uint32_t reserved19;  /* ordinal49 */
4201 	uint32_t reserved20;  /* ordinal50 */
4202 	uint32_t reserved21;  /* ordinal51 */
4203 	uint32_t reserved22;  /* ordinal52 */
4204 	uint32_t reserved23;  /* ordinal53 */
4205 	uint32_t reserved24;  /* ordinal54 */
4206 	uint32_t reserved25;  /* ordinal55 */
4207 	uint32_t reserved26;  /* ordinal56 */
4208 	uint32_t reserved27;  /* ordinal57 */
4209 	uint32_t reserved28;  /* ordinal58 */
4210 	uint32_t reserved29;  /* ordinal59 */
4211 	uint32_t reserved30;  /* ordinal60 */
4212 	uint32_t reserved31;  /* ordinal61 */
4213 	uint32_t reserved32;  /* ordinal62 */
4214 	uint32_t reserved33;  /* ordinal63 */
4215 	uint32_t reserved34;  /* ordinal64 */
4216 	uint32_t compute_user_data_0;  /* ordinal65 */
4217 	uint32_t compute_user_data_1;  /* ordinal66 */
4218 	uint32_t compute_user_data_2;  /* ordinal67 */
4219 	uint32_t compute_user_data_3;  /* ordinal68 */
4220 	uint32_t compute_user_data_4;  /* ordinal69 */
4221 	uint32_t compute_user_data_5;  /* ordinal70 */
4222 	uint32_t compute_user_data_6;  /* ordinal71 */
4223 	uint32_t compute_user_data_7;  /* ordinal72 */
4224 	uint32_t compute_user_data_8;  /* ordinal73 */
4225 	uint32_t compute_user_data_9;  /* ordinal74 */
4226 	uint32_t compute_user_data_10;  /* ordinal75 */
4227 	uint32_t compute_user_data_11;  /* ordinal76 */
4228 	uint32_t compute_user_data_12;  /* ordinal77 */
4229 	uint32_t compute_user_data_13;  /* ordinal78 */
4230 	uint32_t compute_user_data_14;  /* ordinal79 */
4231 	uint32_t compute_user_data_15;  /* ordinal80 */
4232 	uint32_t cp_compute_csinvoc_count_lo;  /* ordinal81 */
4233 	uint32_t cp_compute_csinvoc_count_hi;  /* ordinal82 */
4234 	uint32_t reserved35;  /* ordinal83 */
4235 	uint32_t reserved36;  /* ordinal84 */
4236 	uint32_t reserved37;  /* ordinal85 */
4237 	uint32_t cp_mqd_query_time_lo;  /* ordinal86 */
4238 	uint32_t cp_mqd_query_time_hi;  /* ordinal87 */
4239 	uint32_t cp_mqd_connect_start_time_lo;  /* ordinal88 */
4240 	uint32_t cp_mqd_connect_start_time_hi;  /* ordinal89 */
4241 	uint32_t cp_mqd_connect_end_time_lo;  /* ordinal90 */
4242 	uint32_t cp_mqd_connect_end_time_hi;  /* ordinal91 */
4243 	uint32_t cp_mqd_connect_end_wf_count;  /* ordinal92 */
4244 	uint32_t cp_mqd_connect_end_pq_rptr;  /* ordinal93 */
4245 	uint32_t cp_mqd_connect_end_pq_wptr;  /* ordinal94 */
4246 	uint32_t cp_mqd_connect_end_ib_rptr;  /* ordinal95 */
4247 	uint32_t reserved38;  /* ordinal96 */
4248 	uint32_t reserved39;  /* ordinal97 */
4249 	uint32_t cp_mqd_save_start_time_lo;  /* ordinal98 */
4250 	uint32_t cp_mqd_save_start_time_hi;  /* ordinal99 */
4251 	uint32_t cp_mqd_save_end_time_lo;  /* ordinal100 */
4252 	uint32_t cp_mqd_save_end_time_hi;  /* ordinal101 */
4253 	uint32_t cp_mqd_restore_start_time_lo;  /* ordinal102 */
4254 	uint32_t cp_mqd_restore_start_time_hi;  /* ordinal103 */
4255 	uint32_t cp_mqd_restore_end_time_lo;  /* ordinal104 */
4256 	uint32_t cp_mqd_restore_end_time_hi;  /* ordinal105 */
4257 	uint32_t reserved40;  /* ordinal106 */
4258 	uint32_t reserved41;  /* ordinal107 */
4259 	uint32_t gds_cs_ctxsw_cnt0;  /* ordinal108 */
4260 	uint32_t gds_cs_ctxsw_cnt1;  /* ordinal109 */
4261 	uint32_t gds_cs_ctxsw_cnt2;  /* ordinal110 */
4262 	uint32_t gds_cs_ctxsw_cnt3;  /* ordinal111 */
4263 	uint32_t reserved42;  /* ordinal112 */
4264 	uint32_t reserved43;  /* ordinal113 */
4265 	uint32_t cp_pq_exe_status_lo;  /* ordinal114 */
4266 	uint32_t cp_pq_exe_status_hi;  /* ordinal115 */
4267 	uint32_t cp_packet_id_lo;  /* ordinal116 */
4268 	uint32_t cp_packet_id_hi;  /* ordinal117 */
4269 	uint32_t cp_packet_exe_status_lo;  /* ordinal118 */
4270 	uint32_t cp_packet_exe_status_hi;  /* ordinal119 */
4271 	uint32_t gds_save_base_addr_lo;  /* ordinal120 */
4272 	uint32_t gds_save_base_addr_hi;  /* ordinal121 */
4273 	uint32_t gds_save_mask_lo;  /* ordinal122 */
4274 	uint32_t gds_save_mask_hi;  /* ordinal123 */
4275 	uint32_t ctx_save_base_addr_lo;  /* ordinal124 */
4276 	uint32_t ctx_save_base_addr_hi;  /* ordinal125 */
4277 	uint32_t reserved44;  /* ordinal126 */
4278 	uint32_t reserved45;  /* ordinal127 */
4279 	uint32_t cp_mqd_base_addr_lo;  /* ordinal128 */
4280 	uint32_t cp_mqd_base_addr_hi;  /* ordinal129 */
4281 	uint32_t cp_hqd_active;  /* ordinal130 */
4282 	uint32_t cp_hqd_vmid;  /* ordinal131 */
4283 	uint32_t cp_hqd_persistent_state;  /* ordinal132 */
4284 	uint32_t cp_hqd_pipe_priority;  /* ordinal133 */
4285 	uint32_t cp_hqd_queue_priority;  /* ordinal134 */
4286 	uint32_t cp_hqd_quantum;  /* ordinal135 */
4287 	uint32_t cp_hqd_pq_base_lo;  /* ordinal136 */
4288 	uint32_t cp_hqd_pq_base_hi;  /* ordinal137 */
4289 	uint32_t cp_hqd_pq_rptr;  /* ordinal138 */
4290 	uint32_t cp_hqd_pq_rptr_report_addr_lo;  /* ordinal139 */
4291 	uint32_t cp_hqd_pq_rptr_report_addr_hi;  /* ordinal140 */
4292 	uint32_t cp_hqd_pq_wptr_poll_addr;  /* ordinal141 */
4293 	uint32_t cp_hqd_pq_wptr_poll_addr_hi;  /* ordinal142 */
4294 	uint32_t cp_hqd_pq_doorbell_control;  /* ordinal143 */
4295 	uint32_t cp_hqd_pq_wptr;  /* ordinal144 */
4296 	uint32_t cp_hqd_pq_control;  /* ordinal145 */
4297 	uint32_t cp_hqd_ib_base_addr_lo;  /* ordinal146 */
4298 	uint32_t cp_hqd_ib_base_addr_hi;  /* ordinal147 */
4299 	uint32_t cp_hqd_ib_rptr;  /* ordinal148 */
4300 	uint32_t cp_hqd_ib_control;  /* ordinal149 */
4301 	uint32_t cp_hqd_iq_timer;  /* ordinal150 */
4302 	uint32_t cp_hqd_iq_rptr;  /* ordinal151 */
4303 	uint32_t cp_hqd_dequeue_request;  /* ordinal152 */
4304 	uint32_t cp_hqd_dma_offload;  /* ordinal153 */
4305 	uint32_t cp_hqd_sema_cmd;  /* ordinal154 */
4306 	uint32_t cp_hqd_msg_type;  /* ordinal155 */
4307 	uint32_t cp_hqd_atomic0_preop_lo;  /* ordinal156 */
4308 	uint32_t cp_hqd_atomic0_preop_hi;  /* ordinal157 */
4309 	uint32_t cp_hqd_atomic1_preop_lo;  /* ordinal158 */
4310 	uint32_t cp_hqd_atomic1_preop_hi;  /* ordinal159 */
4311 	uint32_t cp_hqd_hq_status0;  /* ordinal160 */
4312 	uint32_t cp_hqd_hq_control0;  /* ordinal161 */
4313 	uint32_t cp_mqd_control;  /* ordinal162 */
4314 	uint32_t cp_hqd_hq_status1;  /* ordinal163 */
4315 	uint32_t cp_hqd_hq_control1;  /* ordinal164 */
4316 	uint32_t cp_hqd_eop_base_addr_lo;  /* ordinal165 */
4317 	uint32_t cp_hqd_eop_base_addr_hi;  /* ordinal166 */
4318 	uint32_t cp_hqd_eop_control;  /* ordinal167 */
4319 	uint32_t cp_hqd_eop_rptr;  /* ordinal168 */
4320 	uint32_t cp_hqd_eop_wptr;  /* ordinal169 */
4321 	uint32_t cp_hqd_eop_done_events;  /* ordinal170 */
4322 	uint32_t cp_hqd_ctx_save_base_addr_lo;  /* ordinal171 */
4323 	uint32_t cp_hqd_ctx_save_base_addr_hi;  /* ordinal172 */
4324 	uint32_t cp_hqd_ctx_save_control;  /* ordinal173 */
4325 	uint32_t cp_hqd_cntl_stack_offset;  /* ordinal174 */
4326 	uint32_t cp_hqd_cntl_stack_size;  /* ordinal175 */
4327 	uint32_t cp_hqd_wg_state_offset;  /* ordinal176 */
4328 	uint32_t cp_hqd_ctx_save_size;  /* ordinal177 */
4329 	uint32_t cp_hqd_gds_resource_state;  /* ordinal178 */
4330 	uint32_t cp_hqd_error;  /* ordinal179 */
4331 	uint32_t cp_hqd_eop_wptr_mem;  /* ordinal180 */
4332 	uint32_t cp_hqd_eop_dones;  /* ordinal181 */
4333 	uint32_t reserved46;  /* ordinal182 */
4334 	uint32_t reserved47;  /* ordinal183 */
4335 	uint32_t reserved48;  /* ordinal184 */
4336 	uint32_t reserved49;  /* ordinal185 */
4337 	uint32_t reserved50;  /* ordinal186 */
4338 	uint32_t reserved51;  /* ordinal187 */
4339 	uint32_t reserved52;  /* ordinal188 */
4340 	uint32_t reserved53;  /* ordinal189 */
4341 	uint32_t reserved54;  /* ordinal190 */
4342 	uint32_t reserved55;  /* ordinal191 */
4343 	uint32_t iqtimer_pkt_header;  /* ordinal192 */
4344 	uint32_t iqtimer_pkt_dw0;  /* ordinal193 */
4345 	uint32_t iqtimer_pkt_dw1;  /* ordinal194 */
4346 	uint32_t iqtimer_pkt_dw2;  /* ordinal195 */
4347 	uint32_t iqtimer_pkt_dw3;  /* ordinal196 */
4348 	uint32_t iqtimer_pkt_dw4;  /* ordinal197 */
4349 	uint32_t iqtimer_pkt_dw5;  /* ordinal198 */
4350 	uint32_t iqtimer_pkt_dw6;  /* ordinal199 */
4351 	uint32_t iqtimer_pkt_dw7;  /* ordinal200 */
4352 	uint32_t iqtimer_pkt_dw8;  /* ordinal201 */
4353 	uint32_t iqtimer_pkt_dw9;  /* ordinal202 */
4354 	uint32_t iqtimer_pkt_dw10;  /* ordinal203 */
4355 	uint32_t iqtimer_pkt_dw11;  /* ordinal204 */
4356 	uint32_t iqtimer_pkt_dw12;  /* ordinal205 */
4357 	uint32_t iqtimer_pkt_dw13;  /* ordinal206 */
4358 	uint32_t iqtimer_pkt_dw14;  /* ordinal207 */
4359 	uint32_t iqtimer_pkt_dw15;  /* ordinal208 */
4360 	uint32_t iqtimer_pkt_dw16;  /* ordinal209 */
4361 	uint32_t iqtimer_pkt_dw17;  /* ordinal210 */
4362 	uint32_t iqtimer_pkt_dw18;  /* ordinal211 */
4363 	uint32_t iqtimer_pkt_dw19;  /* ordinal212 */
4364 	uint32_t iqtimer_pkt_dw20;  /* ordinal213 */
4365 	uint32_t iqtimer_pkt_dw21;  /* ordinal214 */
4366 	uint32_t iqtimer_pkt_dw22;  /* ordinal215 */
4367 	uint32_t iqtimer_pkt_dw23;  /* ordinal216 */
4368 	uint32_t iqtimer_pkt_dw24;  /* ordinal217 */
4369 	uint32_t iqtimer_pkt_dw25;  /* ordinal218 */
4370 	uint32_t iqtimer_pkt_dw26;  /* ordinal219 */
4371 	uint32_t iqtimer_pkt_dw27;  /* ordinal220 */
4372 	uint32_t iqtimer_pkt_dw28;  /* ordinal221 */
4373 	uint32_t iqtimer_pkt_dw29;  /* ordinal222 */
4374 	uint32_t iqtimer_pkt_dw30;  /* ordinal223 */
4375 	uint32_t iqtimer_pkt_dw31;  /* ordinal224 */
4376 	uint32_t reserved56;  /* ordinal225 */
4377 	uint32_t reserved57;  /* ordinal226 */
4378 	uint32_t reserved58;  /* ordinal227 */
4379 	uint32_t set_resources_header;  /* ordinal228 */
4380 	uint32_t set_resources_dw1;  /* ordinal229 */
4381 	uint32_t set_resources_dw2;  /* ordinal230 */
4382 	uint32_t set_resources_dw3;  /* ordinal231 */
4383 	uint32_t set_resources_dw4;  /* ordinal232 */
4384 	uint32_t set_resources_dw5;  /* ordinal233 */
4385 	uint32_t set_resources_dw6;  /* ordinal234 */
4386 	uint32_t set_resources_dw7;  /* ordinal235 */
4387 	uint32_t reserved59;  /* ordinal236 */
4388 	uint32_t reserved60;  /* ordinal237 */
4389 	uint32_t reserved61;  /* ordinal238 */
4390 	uint32_t reserved62;  /* ordinal239 */
4391 	uint32_t reserved63;  /* ordinal240 */
4392 	uint32_t reserved64;  /* ordinal241 */
4393 	uint32_t reserved65;  /* ordinal242 */
4394 	uint32_t reserved66;  /* ordinal243 */
4395 	uint32_t reserved67;  /* ordinal244 */
4396 	uint32_t reserved68;  /* ordinal245 */
4397 	uint32_t reserved69;  /* ordinal246 */
4398 	uint32_t reserved70;  /* ordinal247 */
4399 	uint32_t reserved71;  /* ordinal248 */
4400 	uint32_t reserved72;  /* ordinal249 */
4401 	uint32_t reserved73;  /* ordinal250 */
4402 	uint32_t reserved74;  /* ordinal251 */
4403 	uint32_t reserved75;  /* ordinal252 */
4404 	uint32_t reserved76;  /* ordinal253 */
4405 	uint32_t reserved77;  /* ordinal254 */
4406 	uint32_t reserved78;  /* ordinal255 */
4407 
4408 	uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
4409 };
4410 
4411 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4412 {
4413 	int i, r;
4414 
4415 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4416 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4417 
4418 		if (ring->mqd_obj) {
4419 			r = amdgpu_bo_reserve(ring->mqd_obj, false);
4420 			if (unlikely(r != 0))
4421 				dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4422 
4423 			amdgpu_bo_unpin(ring->mqd_obj);
4424 			amdgpu_bo_unreserve(ring->mqd_obj);
4425 
4426 			amdgpu_bo_unref(&ring->mqd_obj);
4427 			ring->mqd_obj = NULL;
4428 		}
4429 	}
4430 }
4431 
4432 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4433 {
4434 	int r, i, j;
4435 	u32 tmp;
4436 	bool use_doorbell = true;
4437 	u64 hqd_gpu_addr;
4438 	u64 mqd_gpu_addr;
4439 	u64 eop_gpu_addr;
4440 	u64 wb_gpu_addr;
4441 	u32 *buf;
4442 	struct vi_mqd *mqd;
4443 
4444 	/* init the pipes */
4445 	mutex_lock(&adev->srbm_mutex);
4446 	for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
4447 		int me = (i < 4) ? 1 : 2;
4448 		int pipe = (i < 4) ? i : (i - 4);
4449 
4450 		eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
4451 		eop_gpu_addr >>= 8;
4452 
4453 		vi_srbm_select(adev, me, pipe, 0, 0);
4454 
4455 		/* write the EOP addr */
4456 		WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
4457 		WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4458 
4459 		/* set the VMID assigned */
4460 		WREG32(mmCP_HQD_VMID, 0);
4461 
4462 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4463 		tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4464 		tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4465 				    (order_base_2(MEC_HPD_SIZE / 4) - 1));
4466 		WREG32(mmCP_HQD_EOP_CONTROL, tmp);
4467 	}
4468 	vi_srbm_select(adev, 0, 0, 0, 0);
4469 	mutex_unlock(&adev->srbm_mutex);
4470 
4471 	/* init the queues.  Just two for now. */
4472 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4473 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4474 
4475 		if (ring->mqd_obj == NULL) {
4476 			r = amdgpu_bo_create(adev,
4477 					     sizeof(struct vi_mqd),
4478 					     PAGE_SIZE, true,
4479 					     AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4480 					     NULL, &ring->mqd_obj);
4481 			if (r) {
4482 				dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4483 				return r;
4484 			}
4485 		}
4486 
4487 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
4488 		if (unlikely(r != 0)) {
4489 			gfx_v8_0_cp_compute_fini(adev);
4490 			return r;
4491 		}
4492 		r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4493 				  &mqd_gpu_addr);
4494 		if (r) {
4495 			dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4496 			gfx_v8_0_cp_compute_fini(adev);
4497 			return r;
4498 		}
4499 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4500 		if (r) {
4501 			dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4502 			gfx_v8_0_cp_compute_fini(adev);
4503 			return r;
4504 		}
4505 
4506 		/* init the mqd struct */
4507 		memset(buf, 0, sizeof(struct vi_mqd));
4508 
4509 		mqd = (struct vi_mqd *)buf;
4510 		mqd->header = 0xC0310800;
4511 		mqd->compute_pipelinestat_enable = 0x00000001;
4512 		mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4513 		mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4514 		mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4515 		mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4516 		mqd->compute_misc_reserved = 0x00000003;
4517 
4518 		mutex_lock(&adev->srbm_mutex);
4519 		vi_srbm_select(adev, ring->me,
4520 			       ring->pipe,
4521 			       ring->queue, 0);
4522 
4523 		/* disable wptr polling */
4524 		tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4525 		tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4526 		WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4527 
4528 		mqd->cp_hqd_eop_base_addr_lo =
4529 			RREG32(mmCP_HQD_EOP_BASE_ADDR);
4530 		mqd->cp_hqd_eop_base_addr_hi =
4531 			RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
4532 
4533 		/* enable doorbell? */
4534 		tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4535 		if (use_doorbell) {
4536 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4537 		} else {
4538 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4539 		}
4540 		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4541 		mqd->cp_hqd_pq_doorbell_control = tmp;
4542 
4543 		/* disable the queue if it's active */
4544 		mqd->cp_hqd_dequeue_request = 0;
4545 		mqd->cp_hqd_pq_rptr = 0;
4546 		mqd->cp_hqd_pq_wptr= 0;
4547 		if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4548 			WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4549 			for (j = 0; j < adev->usec_timeout; j++) {
4550 				if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4551 					break;
4552 				udelay(1);
4553 			}
4554 			WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4555 			WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4556 			WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4557 		}
4558 
4559 		/* set the pointer to the MQD */
4560 		mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4561 		mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4562 		WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4563 		WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4564 
4565 		/* set MQD vmid to 0 */
4566 		tmp = RREG32(mmCP_MQD_CONTROL);
4567 		tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4568 		WREG32(mmCP_MQD_CONTROL, tmp);
4569 		mqd->cp_mqd_control = tmp;
4570 
4571 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4572 		hqd_gpu_addr = ring->gpu_addr >> 8;
4573 		mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4574 		mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4575 		WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4576 		WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4577 
4578 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4579 		tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4580 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4581 				    (order_base_2(ring->ring_size / 4) - 1));
4582 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4583 			       ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4584 #ifdef __BIG_ENDIAN
4585 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4586 #endif
4587 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4588 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4589 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4590 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4591 		WREG32(mmCP_HQD_PQ_CONTROL, tmp);
4592 		mqd->cp_hqd_pq_control = tmp;
4593 
4594 		/* set the wb address wether it's enabled or not */
4595 		wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4596 		mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4597 		mqd->cp_hqd_pq_rptr_report_addr_hi =
4598 			upper_32_bits(wb_gpu_addr) & 0xffff;
4599 		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4600 		       mqd->cp_hqd_pq_rptr_report_addr_lo);
4601 		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4602 		       mqd->cp_hqd_pq_rptr_report_addr_hi);
4603 
4604 		/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4605 		wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4606 		mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4607 		mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4608 		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
4609 		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4610 		       mqd->cp_hqd_pq_wptr_poll_addr_hi);
4611 
4612 		/* enable the doorbell if requested */
4613 		if (use_doorbell) {
4614 			if ((adev->asic_type == CHIP_CARRIZO) ||
4615 			    (adev->asic_type == CHIP_FIJI) ||
4616 			    (adev->asic_type == CHIP_STONEY) ||
4617 			    (adev->asic_type == CHIP_POLARIS11) ||
4618 			    (adev->asic_type == CHIP_POLARIS10)) {
4619 				WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4620 				       AMDGPU_DOORBELL_KIQ << 2);
4621 				WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4622 				       AMDGPU_DOORBELL_MEC_RING7 << 2);
4623 			}
4624 			tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4625 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4626 					    DOORBELL_OFFSET, ring->doorbell_index);
4627 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4628 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4629 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4630 			mqd->cp_hqd_pq_doorbell_control = tmp;
4631 
4632 		} else {
4633 			mqd->cp_hqd_pq_doorbell_control = 0;
4634 		}
4635 		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
4636 		       mqd->cp_hqd_pq_doorbell_control);
4637 
4638 		/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4639 		ring->wptr = 0;
4640 		mqd->cp_hqd_pq_wptr = ring->wptr;
4641 		WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4642 		mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4643 
4644 		/* set the vmid for the queue */
4645 		mqd->cp_hqd_vmid = 0;
4646 		WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4647 
4648 		tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4649 		tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4650 		WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
4651 		mqd->cp_hqd_persistent_state = tmp;
4652 		if (adev->asic_type == CHIP_STONEY ||
4653 			adev->asic_type == CHIP_POLARIS11 ||
4654 			adev->asic_type == CHIP_POLARIS10) {
4655 			tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
4656 			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
4657 			WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
4658 		}
4659 
4660 		/* activate the queue */
4661 		mqd->cp_hqd_active = 1;
4662 		WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4663 
4664 		vi_srbm_select(adev, 0, 0, 0, 0);
4665 		mutex_unlock(&adev->srbm_mutex);
4666 
4667 		amdgpu_bo_kunmap(ring->mqd_obj);
4668 		amdgpu_bo_unreserve(ring->mqd_obj);
4669 	}
4670 
4671 	if (use_doorbell) {
4672 		tmp = RREG32(mmCP_PQ_STATUS);
4673 		tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4674 		WREG32(mmCP_PQ_STATUS, tmp);
4675 	}
4676 
4677 	gfx_v8_0_cp_compute_enable(adev, true);
4678 
4679 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4680 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4681 
4682 		ring->ready = true;
4683 		r = amdgpu_ring_test_ring(ring);
4684 		if (r)
4685 			ring->ready = false;
4686 	}
4687 
4688 	return 0;
4689 }
4690 
4691 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4692 {
4693 	int r;
4694 
4695 	if (!(adev->flags & AMD_IS_APU))
4696 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4697 
4698 	if (!adev->pp_enabled) {
4699 		if (!adev->firmware.smu_load) {
4700 			/* legacy firmware loading */
4701 			r = gfx_v8_0_cp_gfx_load_microcode(adev);
4702 			if (r)
4703 				return r;
4704 
4705 			r = gfx_v8_0_cp_compute_load_microcode(adev);
4706 			if (r)
4707 				return r;
4708 		} else {
4709 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4710 							AMDGPU_UCODE_ID_CP_CE);
4711 			if (r)
4712 				return -EINVAL;
4713 
4714 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4715 							AMDGPU_UCODE_ID_CP_PFP);
4716 			if (r)
4717 				return -EINVAL;
4718 
4719 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4720 							AMDGPU_UCODE_ID_CP_ME);
4721 			if (r)
4722 				return -EINVAL;
4723 
4724 			if (adev->asic_type == CHIP_TOPAZ) {
4725 				r = gfx_v8_0_cp_compute_load_microcode(adev);
4726 				if (r)
4727 					return r;
4728 			} else {
4729 				r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4730 										 AMDGPU_UCODE_ID_CP_MEC1);
4731 				if (r)
4732 					return -EINVAL;
4733 			}
4734 		}
4735 	}
4736 
4737 	r = gfx_v8_0_cp_gfx_resume(adev);
4738 	if (r)
4739 		return r;
4740 
4741 	r = gfx_v8_0_cp_compute_resume(adev);
4742 	if (r)
4743 		return r;
4744 
4745 	gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4746 
4747 	return 0;
4748 }
4749 
4750 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4751 {
4752 	gfx_v8_0_cp_gfx_enable(adev, enable);
4753 	gfx_v8_0_cp_compute_enable(adev, enable);
4754 }
4755 
4756 static int gfx_v8_0_hw_init(void *handle)
4757 {
4758 	int r;
4759 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4760 
4761 	gfx_v8_0_init_golden_registers(adev);
4762 
4763 	gfx_v8_0_gpu_init(adev);
4764 
4765 	r = gfx_v8_0_rlc_resume(adev);
4766 	if (r)
4767 		return r;
4768 
4769 	r = gfx_v8_0_cp_resume(adev);
4770 	if (r)
4771 		return r;
4772 
4773 	return r;
4774 }
4775 
4776 static int gfx_v8_0_hw_fini(void *handle)
4777 {
4778 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4779 
4780 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4781 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4782 	gfx_v8_0_cp_enable(adev, false);
4783 	gfx_v8_0_rlc_stop(adev);
4784 	gfx_v8_0_cp_compute_fini(adev);
4785 
4786 	amdgpu_set_powergating_state(adev,
4787 			AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
4788 
4789 	return 0;
4790 }
4791 
4792 static int gfx_v8_0_suspend(void *handle)
4793 {
4794 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4795 
4796 	return gfx_v8_0_hw_fini(adev);
4797 }
4798 
4799 static int gfx_v8_0_resume(void *handle)
4800 {
4801 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4802 
4803 	return gfx_v8_0_hw_init(adev);
4804 }
4805 
4806 static bool gfx_v8_0_is_idle(void *handle)
4807 {
4808 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4809 
4810 	if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
4811 		return false;
4812 	else
4813 		return true;
4814 }
4815 
4816 static int gfx_v8_0_wait_for_idle(void *handle)
4817 {
4818 	unsigned i;
4819 	u32 tmp;
4820 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4821 
4822 	for (i = 0; i < adev->usec_timeout; i++) {
4823 		/* read MC_STATUS */
4824 		tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
4825 
4826 		if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
4827 			return 0;
4828 		udelay(1);
4829 	}
4830 	return -ETIMEDOUT;
4831 }
4832 
4833 static int gfx_v8_0_soft_reset(void *handle)
4834 {
4835 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4836 	u32 tmp;
4837 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4838 
4839 	/* GRBM_STATUS */
4840 	tmp = RREG32(mmGRBM_STATUS);
4841 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4842 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4843 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4844 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4845 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4846 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4847 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4848 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4849 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4850 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4851 	}
4852 
4853 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4854 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4855 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4856 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4857 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4858 	}
4859 
4860 	/* GRBM_STATUS2 */
4861 	tmp = RREG32(mmGRBM_STATUS2);
4862 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4863 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4864 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4865 
4866 	/* SRBM_STATUS */
4867 	tmp = RREG32(mmSRBM_STATUS);
4868 	if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4869 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4870 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4871 
4872 	if (grbm_soft_reset || srbm_soft_reset) {
4873 		/* stop the rlc */
4874 		gfx_v8_0_rlc_stop(adev);
4875 
4876 		/* Disable GFX parsing/prefetching */
4877 		gfx_v8_0_cp_gfx_enable(adev, false);
4878 
4879 		/* Disable MEC parsing/prefetching */
4880 		gfx_v8_0_cp_compute_enable(adev, false);
4881 
4882 		if (grbm_soft_reset || srbm_soft_reset) {
4883 			tmp = RREG32(mmGMCON_DEBUG);
4884 			tmp = REG_SET_FIELD(tmp,
4885 					    GMCON_DEBUG, GFX_STALL, 1);
4886 			tmp = REG_SET_FIELD(tmp,
4887 					    GMCON_DEBUG, GFX_CLEAR, 1);
4888 			WREG32(mmGMCON_DEBUG, tmp);
4889 
4890 			udelay(50);
4891 		}
4892 
4893 		if (grbm_soft_reset) {
4894 			tmp = RREG32(mmGRBM_SOFT_RESET);
4895 			tmp |= grbm_soft_reset;
4896 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4897 			WREG32(mmGRBM_SOFT_RESET, tmp);
4898 			tmp = RREG32(mmGRBM_SOFT_RESET);
4899 
4900 			udelay(50);
4901 
4902 			tmp &= ~grbm_soft_reset;
4903 			WREG32(mmGRBM_SOFT_RESET, tmp);
4904 			tmp = RREG32(mmGRBM_SOFT_RESET);
4905 		}
4906 
4907 		if (srbm_soft_reset) {
4908 			tmp = RREG32(mmSRBM_SOFT_RESET);
4909 			tmp |= srbm_soft_reset;
4910 			dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4911 			WREG32(mmSRBM_SOFT_RESET, tmp);
4912 			tmp = RREG32(mmSRBM_SOFT_RESET);
4913 
4914 			udelay(50);
4915 
4916 			tmp &= ~srbm_soft_reset;
4917 			WREG32(mmSRBM_SOFT_RESET, tmp);
4918 			tmp = RREG32(mmSRBM_SOFT_RESET);
4919 		}
4920 
4921 		if (grbm_soft_reset || srbm_soft_reset) {
4922 			tmp = RREG32(mmGMCON_DEBUG);
4923 			tmp = REG_SET_FIELD(tmp,
4924 					    GMCON_DEBUG, GFX_STALL, 0);
4925 			tmp = REG_SET_FIELD(tmp,
4926 					    GMCON_DEBUG, GFX_CLEAR, 0);
4927 			WREG32(mmGMCON_DEBUG, tmp);
4928 		}
4929 
4930 		/* Wait a little for things to settle down */
4931 		udelay(50);
4932 	}
4933 	return 0;
4934 }
4935 
4936 /**
4937  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
4938  *
4939  * @adev: amdgpu_device pointer
4940  *
4941  * Fetches a GPU clock counter snapshot.
4942  * Returns the 64 bit clock counter snapshot.
4943  */
4944 uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4945 {
4946 	uint64_t clock;
4947 
4948 	mutex_lock(&adev->gfx.gpu_clock_mutex);
4949 	WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4950 	clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
4951 		((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4952 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
4953 	return clock;
4954 }
4955 
4956 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4957 					  uint32_t vmid,
4958 					  uint32_t gds_base, uint32_t gds_size,
4959 					  uint32_t gws_base, uint32_t gws_size,
4960 					  uint32_t oa_base, uint32_t oa_size)
4961 {
4962 	gds_base = gds_base >> AMDGPU_GDS_SHIFT;
4963 	gds_size = gds_size >> AMDGPU_GDS_SHIFT;
4964 
4965 	gws_base = gws_base >> AMDGPU_GWS_SHIFT;
4966 	gws_size = gws_size >> AMDGPU_GWS_SHIFT;
4967 
4968 	oa_base = oa_base >> AMDGPU_OA_SHIFT;
4969 	oa_size = oa_size >> AMDGPU_OA_SHIFT;
4970 
4971 	/* GDS Base */
4972 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4973 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4974 				WRITE_DATA_DST_SEL(0)));
4975 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
4976 	amdgpu_ring_write(ring, 0);
4977 	amdgpu_ring_write(ring, gds_base);
4978 
4979 	/* GDS Size */
4980 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4981 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4982 				WRITE_DATA_DST_SEL(0)));
4983 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
4984 	amdgpu_ring_write(ring, 0);
4985 	amdgpu_ring_write(ring, gds_size);
4986 
4987 	/* GWS */
4988 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4989 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4990 				WRITE_DATA_DST_SEL(0)));
4991 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
4992 	amdgpu_ring_write(ring, 0);
4993 	amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4994 
4995 	/* OA */
4996 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4997 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4998 				WRITE_DATA_DST_SEL(0)));
4999 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5000 	amdgpu_ring_write(ring, 0);
5001 	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5002 }
5003 
5004 static int gfx_v8_0_early_init(void *handle)
5005 {
5006 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5007 
5008 	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5009 	adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5010 	gfx_v8_0_set_ring_funcs(adev);
5011 	gfx_v8_0_set_irq_funcs(adev);
5012 	gfx_v8_0_set_gds_init(adev);
5013 	gfx_v8_0_set_rlc_funcs(adev);
5014 
5015 	return 0;
5016 }
5017 
5018 static int gfx_v8_0_late_init(void *handle)
5019 {
5020 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5021 	int r;
5022 
5023 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5024 	if (r)
5025 		return r;
5026 
5027 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5028 	if (r)
5029 		return r;
5030 
5031 	/* requires IBs so do in late init after IB pool is initialized */
5032 	r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5033 	if (r)
5034 		return r;
5035 
5036 	amdgpu_set_powergating_state(adev,
5037 			AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5038 
5039 	return 0;
5040 }
5041 
5042 static void polaris11_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5043 		bool enable)
5044 {
5045 	uint32_t data, temp;
5046 
5047 	/* Send msg to SMU via Powerplay */
5048 	amdgpu_set_powergating_state(adev,
5049 			AMD_IP_BLOCK_TYPE_SMC,
5050 			enable ? AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5051 
5052 	if (enable) {
5053 		/* Enable static MGPG */
5054 		temp = data = RREG32(mmRLC_PG_CNTL);
5055 		data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
5056 
5057 		if (temp != data)
5058 			WREG32(mmRLC_PG_CNTL, data);
5059 	} else {
5060 		temp = data = RREG32(mmRLC_PG_CNTL);
5061 		data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
5062 
5063 		if (temp != data)
5064 			WREG32(mmRLC_PG_CNTL, data);
5065 	}
5066 }
5067 
5068 static void polaris11_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5069 		bool enable)
5070 {
5071 	uint32_t data, temp;
5072 
5073 	if (enable) {
5074 		/* Enable dynamic MGPG */
5075 		temp = data = RREG32(mmRLC_PG_CNTL);
5076 		data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
5077 
5078 		if (temp != data)
5079 			WREG32(mmRLC_PG_CNTL, data);
5080 	} else {
5081 		temp = data = RREG32(mmRLC_PG_CNTL);
5082 		data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
5083 
5084 		if (temp != data)
5085 			WREG32(mmRLC_PG_CNTL, data);
5086 	}
5087 }
5088 
5089 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5090 		bool enable)
5091 {
5092 	uint32_t data, temp;
5093 
5094 	if (enable) {
5095 		/* Enable quick PG */
5096 		temp = data = RREG32(mmRLC_PG_CNTL);
5097 		data |= 0x100000;
5098 
5099 		if (temp != data)
5100 			WREG32(mmRLC_PG_CNTL, data);
5101 	} else {
5102 		temp = data = RREG32(mmRLC_PG_CNTL);
5103 		data &= ~0x100000;
5104 
5105 		if (temp != data)
5106 			WREG32(mmRLC_PG_CNTL, data);
5107 	}
5108 }
5109 
5110 static int gfx_v8_0_set_powergating_state(void *handle,
5111 					  enum amd_powergating_state state)
5112 {
5113 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5114 
5115 	if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5116 		return 0;
5117 
5118 	switch (adev->asic_type) {
5119 	case CHIP_POLARIS11:
5120 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG)
5121 			polaris11_enable_gfx_static_mg_power_gating(adev,
5122 					state == AMD_PG_STATE_GATE ? true : false);
5123 		else if (adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG)
5124 			polaris11_enable_gfx_dynamic_mg_power_gating(adev,
5125 					state == AMD_PG_STATE_GATE ? true : false);
5126 		else
5127 			polaris11_enable_gfx_quick_mg_power_gating(adev,
5128 					state == AMD_PG_STATE_GATE ? true : false);
5129 		break;
5130 	default:
5131 		break;
5132 	}
5133 
5134 	return 0;
5135 }
5136 
5137 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5138 				     uint32_t reg_addr, uint32_t cmd)
5139 {
5140 	uint32_t data;
5141 
5142 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
5143 
5144 	WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5145 	WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5146 
5147 	data = RREG32(mmRLC_SERDES_WR_CTRL);
5148 	if (adev->asic_type == CHIP_STONEY)
5149 			data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5150 			RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5151 			RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5152 			RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5153 			RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5154 			RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5155 			RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5156 			RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5157 			RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5158 	else
5159 		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5160 			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5161 			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5162 			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5163 			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5164 			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5165 			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5166 			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5167 			  RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5168 			  RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5169 			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5170 	data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5171 		 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5172 		 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5173 		 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5174 
5175 	WREG32(mmRLC_SERDES_WR_CTRL, data);
5176 }
5177 
5178 #define MSG_ENTER_RLC_SAFE_MODE     1
5179 #define MSG_EXIT_RLC_SAFE_MODE      0
5180 
5181 #define RLC_GPR_REG2__REQ_MASK           0x00000001
5182 #define RLC_GPR_REG2__MESSAGE__SHIFT     0x00000001
5183 #define RLC_GPR_REG2__MESSAGE_MASK       0x0000001e
5184 
5185 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
5186 {
5187 	u32 data = 0;
5188 	unsigned i;
5189 
5190 	data = RREG32(mmRLC_CNTL);
5191 	if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5192 		return;
5193 
5194 	if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5195 	    (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5196 			       AMD_PG_SUPPORT_GFX_DMG))) {
5197 		data |= RLC_GPR_REG2__REQ_MASK;
5198 		data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5199 		data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5200 		WREG32(mmRLC_GPR_REG2, data);
5201 
5202 		for (i = 0; i < adev->usec_timeout; i++) {
5203 			if ((RREG32(mmRLC_GPM_STAT) &
5204 			     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5205 			      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5206 			    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5207 			     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5208 				break;
5209 			udelay(1);
5210 		}
5211 
5212 		for (i = 0; i < adev->usec_timeout; i++) {
5213 			if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
5214 				break;
5215 			udelay(1);
5216 		}
5217 		adev->gfx.rlc.in_safe_mode = true;
5218 	}
5219 }
5220 
5221 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
5222 {
5223 	u32 data;
5224 	unsigned i;
5225 
5226 	data = RREG32(mmRLC_CNTL);
5227 	if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5228 		return;
5229 
5230 	if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5231 	    (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5232 			       AMD_PG_SUPPORT_GFX_DMG))) {
5233 		data |= RLC_GPR_REG2__REQ_MASK;
5234 		data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5235 		data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5236 		WREG32(mmRLC_GPR_REG2, data);
5237 		adev->gfx.rlc.in_safe_mode = false;
5238 	}
5239 
5240 	for (i = 0; i < adev->usec_timeout; i++) {
5241 		if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
5242 			break;
5243 		udelay(1);
5244 	}
5245 }
5246 
5247 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5248 {
5249 	u32 data;
5250 	unsigned i;
5251 
5252 	data = RREG32(mmRLC_CNTL);
5253 	if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5254 		return;
5255 
5256 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5257 		data |= RLC_SAFE_MODE__CMD_MASK;
5258 		data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5259 		data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5260 		WREG32(mmRLC_SAFE_MODE, data);
5261 
5262 		for (i = 0; i < adev->usec_timeout; i++) {
5263 			if ((RREG32(mmRLC_GPM_STAT) &
5264 			     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5265 			      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5266 			    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5267 			     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5268 				break;
5269 			udelay(1);
5270 		}
5271 
5272 		for (i = 0; i < adev->usec_timeout; i++) {
5273 			if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
5274 				break;
5275 			udelay(1);
5276 		}
5277 		adev->gfx.rlc.in_safe_mode = true;
5278 	}
5279 }
5280 
5281 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5282 {
5283 	u32 data = 0;
5284 	unsigned i;
5285 
5286 	data = RREG32(mmRLC_CNTL);
5287 	if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5288 		return;
5289 
5290 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5291 		if (adev->gfx.rlc.in_safe_mode) {
5292 			data |= RLC_SAFE_MODE__CMD_MASK;
5293 			data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5294 			WREG32(mmRLC_SAFE_MODE, data);
5295 			adev->gfx.rlc.in_safe_mode = false;
5296 		}
5297 	}
5298 
5299 	for (i = 0; i < adev->usec_timeout; i++) {
5300 		if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
5301 			break;
5302 		udelay(1);
5303 	}
5304 }
5305 
5306 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
5307 {
5308 	adev->gfx.rlc.in_safe_mode = true;
5309 }
5310 
5311 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
5312 {
5313 	adev->gfx.rlc.in_safe_mode = false;
5314 }
5315 
5316 static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
5317 	.enter_safe_mode = cz_enter_rlc_safe_mode,
5318 	.exit_safe_mode = cz_exit_rlc_safe_mode
5319 };
5320 
5321 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5322 	.enter_safe_mode = iceland_enter_rlc_safe_mode,
5323 	.exit_safe_mode = iceland_exit_rlc_safe_mode
5324 };
5325 
5326 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
5327 	.enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
5328 	.exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
5329 };
5330 
5331 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5332 						      bool enable)
5333 {
5334 	uint32_t temp, data;
5335 
5336 	adev->gfx.rlc.funcs->enter_safe_mode(adev);
5337 
5338 	/* It is disabled by HW by default */
5339 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5340 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5341 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5342 				/* 1 - RLC memory Light sleep */
5343 				temp = data = RREG32(mmRLC_MEM_SLP_CNTL);
5344 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5345 				if (temp != data)
5346 					WREG32(mmRLC_MEM_SLP_CNTL, data);
5347 			}
5348 
5349 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5350 				/* 2 - CP memory Light sleep */
5351 				temp = data = RREG32(mmCP_MEM_SLP_CNTL);
5352 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5353 				if (temp != data)
5354 					WREG32(mmCP_MEM_SLP_CNTL, data);
5355 			}
5356 		}
5357 
5358 		/* 3 - RLC_CGTT_MGCG_OVERRIDE */
5359 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5360 		if (adev->flags & AMD_IS_APU)
5361 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5362 				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5363 				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5364 		else
5365 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5366 				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5367 				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5368 				  RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5369 
5370 		if (temp != data)
5371 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5372 
5373 		/* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5374 		gfx_v8_0_wait_for_rlc_serdes(adev);
5375 
5376 		/* 5 - clear mgcg override */
5377 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5378 
5379 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5380 			/* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5381 			temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5382 			data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5383 			data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5384 			data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5385 			data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5386 			if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5387 			    (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5388 				data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5389 			data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5390 			data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5391 			if (temp != data)
5392 				WREG32(mmCGTS_SM_CTRL_REG, data);
5393 		}
5394 		udelay(50);
5395 
5396 		/* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5397 		gfx_v8_0_wait_for_rlc_serdes(adev);
5398 	} else {
5399 		/* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5400 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5401 		data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5402 				RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5403 				RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5404 				RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5405 		if (temp != data)
5406 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5407 
5408 		/* 2 - disable MGLS in RLC */
5409 		data = RREG32(mmRLC_MEM_SLP_CNTL);
5410 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5411 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5412 			WREG32(mmRLC_MEM_SLP_CNTL, data);
5413 		}
5414 
5415 		/* 3 - disable MGLS in CP */
5416 		data = RREG32(mmCP_MEM_SLP_CNTL);
5417 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5418 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5419 			WREG32(mmCP_MEM_SLP_CNTL, data);
5420 		}
5421 
5422 		/* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5423 		temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5424 		data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5425 				CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5426 		if (temp != data)
5427 			WREG32(mmCGTS_SM_CTRL_REG, data);
5428 
5429 		/* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5430 		gfx_v8_0_wait_for_rlc_serdes(adev);
5431 
5432 		/* 6 - set mgcg override */
5433 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5434 
5435 		udelay(50);
5436 
5437 		/* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5438 		gfx_v8_0_wait_for_rlc_serdes(adev);
5439 	}
5440 
5441 	adev->gfx.rlc.funcs->exit_safe_mode(adev);
5442 }
5443 
5444 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5445 						      bool enable)
5446 {
5447 	uint32_t temp, temp1, data, data1;
5448 
5449 	temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5450 
5451 	adev->gfx.rlc.funcs->enter_safe_mode(adev);
5452 
5453 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5454 		/* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
5455 		 * Cmp_busy/GFX_Idle interrupts
5456 		 */
5457 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5458 
5459 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5460 		data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5461 		if (temp1 != data1)
5462 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5463 
5464 		/* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5465 		gfx_v8_0_wait_for_rlc_serdes(adev);
5466 
5467 		/* 3 - clear cgcg override */
5468 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5469 
5470 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5471 		gfx_v8_0_wait_for_rlc_serdes(adev);
5472 
5473 		/* 4 - write cmd to set CGLS */
5474 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5475 
5476 		/* 5 - enable cgcg */
5477 		data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5478 
5479 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5480 			/* enable cgls*/
5481 			data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5482 
5483 			temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5484 			data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5485 
5486 			if (temp1 != data1)
5487 				WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5488 		} else {
5489 			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5490 		}
5491 
5492 		if (temp != data)
5493 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5494 	} else {
5495 		/* disable cntx_empty_int_enable & GFX Idle interrupt */
5496 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5497 
5498 		/* TEST CGCG */
5499 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5500 		data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5501 				RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5502 		if (temp1 != data1)
5503 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5504 
5505 		/* read gfx register to wake up cgcg */
5506 		RREG32(mmCB_CGTT_SCLK_CTRL);
5507 		RREG32(mmCB_CGTT_SCLK_CTRL);
5508 		RREG32(mmCB_CGTT_SCLK_CTRL);
5509 		RREG32(mmCB_CGTT_SCLK_CTRL);
5510 
5511 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5512 		gfx_v8_0_wait_for_rlc_serdes(adev);
5513 
5514 		/* write cmd to Set CGCG Overrride */
5515 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5516 
5517 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5518 		gfx_v8_0_wait_for_rlc_serdes(adev);
5519 
5520 		/* write cmd to Clear CGLS */
5521 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5522 
5523 		/* disable cgcg, cgls should be disabled too. */
5524 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5525 			  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5526 		if (temp != data)
5527 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5528 	}
5529 
5530 	adev->gfx.rlc.funcs->exit_safe_mode(adev);
5531 }
5532 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5533 					    bool enable)
5534 {
5535 	if (enable) {
5536 		/* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5537 		 * ===  MGCG + MGLS + TS(CG/LS) ===
5538 		 */
5539 		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5540 		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5541 	} else {
5542 		/* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5543 		 * ===  CGCG + CGLS ===
5544 		 */
5545 		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5546 		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5547 	}
5548 	return 0;
5549 }
5550 
5551 static int gfx_v8_0_set_clockgating_state(void *handle,
5552 					  enum amd_clockgating_state state)
5553 {
5554 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5555 
5556 	switch (adev->asic_type) {
5557 	case CHIP_FIJI:
5558 	case CHIP_CARRIZO:
5559 	case CHIP_STONEY:
5560 		gfx_v8_0_update_gfx_clock_gating(adev,
5561 						 state == AMD_CG_STATE_GATE ? true : false);
5562 		break;
5563 	default:
5564 		break;
5565 	}
5566 	return 0;
5567 }
5568 
5569 static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5570 {
5571 	u32 rptr;
5572 
5573 	rptr = ring->adev->wb.wb[ring->rptr_offs];
5574 
5575 	return rptr;
5576 }
5577 
5578 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5579 {
5580 	struct amdgpu_device *adev = ring->adev;
5581 	u32 wptr;
5582 
5583 	if (ring->use_doorbell)
5584 		/* XXX check if swapping is necessary on BE */
5585 		wptr = ring->adev->wb.wb[ring->wptr_offs];
5586 	else
5587 		wptr = RREG32(mmCP_RB0_WPTR);
5588 
5589 	return wptr;
5590 }
5591 
5592 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5593 {
5594 	struct amdgpu_device *adev = ring->adev;
5595 
5596 	if (ring->use_doorbell) {
5597 		/* XXX check if swapping is necessary on BE */
5598 		adev->wb.wb[ring->wptr_offs] = ring->wptr;
5599 		WDOORBELL32(ring->doorbell_index, ring->wptr);
5600 	} else {
5601 		WREG32(mmCP_RB0_WPTR, ring->wptr);
5602 		(void)RREG32(mmCP_RB0_WPTR);
5603 	}
5604 }
5605 
5606 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5607 {
5608 	u32 ref_and_mask, reg_mem_engine;
5609 
5610 	if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
5611 		switch (ring->me) {
5612 		case 1:
5613 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
5614 			break;
5615 		case 2:
5616 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
5617 			break;
5618 		default:
5619 			return;
5620 		}
5621 		reg_mem_engine = 0;
5622 	} else {
5623 		ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
5624 		reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
5625 	}
5626 
5627 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5628 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
5629 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
5630 				 reg_mem_engine));
5631 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
5632 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
5633 	amdgpu_ring_write(ring, ref_and_mask);
5634 	amdgpu_ring_write(ring, ref_and_mask);
5635 	amdgpu_ring_write(ring, 0x20); /* poll interval */
5636 }
5637 
5638 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
5639 {
5640 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5641 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5642 				 WRITE_DATA_DST_SEL(0) |
5643 				 WR_CONFIRM));
5644 	amdgpu_ring_write(ring, mmHDP_DEBUG0);
5645 	amdgpu_ring_write(ring, 0);
5646 	amdgpu_ring_write(ring, 1);
5647 
5648 }
5649 
5650 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5651 				      struct amdgpu_ib *ib,
5652 				      unsigned vm_id, bool ctx_switch)
5653 {
5654 	u32 header, control = 0;
5655 	u32 next_rptr = ring->wptr + 5;
5656 
5657 	if (ctx_switch)
5658 		next_rptr += 2;
5659 
5660 	next_rptr += 4;
5661 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5662 	amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
5663 	amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
5664 	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
5665 	amdgpu_ring_write(ring, next_rptr);
5666 
5667 	/* insert SWITCH_BUFFER packet before first IB in the ring frame */
5668 	if (ctx_switch) {
5669 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5670 		amdgpu_ring_write(ring, 0);
5671 	}
5672 
5673 	if (ib->flags & AMDGPU_IB_FLAG_CE)
5674 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5675 	else
5676 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5677 
5678 	control |= ib->length_dw | (vm_id << 24);
5679 
5680 	amdgpu_ring_write(ring, header);
5681 	amdgpu_ring_write(ring,
5682 #ifdef __BIG_ENDIAN
5683 			  (2 << 0) |
5684 #endif
5685 			  (ib->gpu_addr & 0xFFFFFFFC));
5686 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5687 	amdgpu_ring_write(ring, control);
5688 }
5689 
5690 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5691 					  struct amdgpu_ib *ib,
5692 					  unsigned vm_id, bool ctx_switch)
5693 {
5694 	u32 header, control = 0;
5695 	u32 next_rptr = ring->wptr + 5;
5696 
5697 	control |= INDIRECT_BUFFER_VALID;
5698 
5699 	next_rptr += 4;
5700 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5701 	amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
5702 	amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
5703 	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
5704 	amdgpu_ring_write(ring, next_rptr);
5705 
5706 	header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5707 
5708 	control |= ib->length_dw | (vm_id << 24);
5709 
5710 	amdgpu_ring_write(ring, header);
5711 	amdgpu_ring_write(ring,
5712 #ifdef __BIG_ENDIAN
5713 					  (2 << 0) |
5714 #endif
5715 					  (ib->gpu_addr & 0xFFFFFFFC));
5716 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5717 	amdgpu_ring_write(ring, control);
5718 }
5719 
5720 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
5721 					 u64 seq, unsigned flags)
5722 {
5723 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5724 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5725 
5726 	/* EVENT_WRITE_EOP - flush caches, send int */
5727 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
5728 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
5729 				 EOP_TC_ACTION_EN |
5730 				 EOP_TC_WB_ACTION_EN |
5731 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5732 				 EVENT_INDEX(5)));
5733 	amdgpu_ring_write(ring, addr & 0xfffffffc);
5734 	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
5735 			  DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5736 	amdgpu_ring_write(ring, lower_32_bits(seq));
5737 	amdgpu_ring_write(ring, upper_32_bits(seq));
5738 
5739 }
5740 
5741 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5742 {
5743 	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
5744 	uint32_t seq = ring->fence_drv.sync_seq;
5745 	uint64_t addr = ring->fence_drv.gpu_addr;
5746 
5747 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5748 	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
5749 				 WAIT_REG_MEM_FUNCTION(3) | /* equal */
5750 				 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
5751 	amdgpu_ring_write(ring, addr & 0xfffffffc);
5752 	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
5753 	amdgpu_ring_write(ring, seq);
5754 	amdgpu_ring_write(ring, 0xffffffff);
5755 	amdgpu_ring_write(ring, 4); /* poll interval */
5756 
5757 	if (usepfp) {
5758 		/* synce CE with ME to prevent CE fetch CEIB before context switch done */
5759 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5760 		amdgpu_ring_write(ring, 0);
5761 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5762 		amdgpu_ring_write(ring, 0);
5763 	}
5764 }
5765 
5766 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5767 					unsigned vm_id, uint64_t pd_addr)
5768 {
5769 	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
5770 
5771 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5772 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5773 				 WRITE_DATA_DST_SEL(0)) |
5774 				 WR_CONFIRM);
5775 	if (vm_id < 8) {
5776 		amdgpu_ring_write(ring,
5777 				  (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
5778 	} else {
5779 		amdgpu_ring_write(ring,
5780 				  (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
5781 	}
5782 	amdgpu_ring_write(ring, 0);
5783 	amdgpu_ring_write(ring, pd_addr >> 12);
5784 
5785 	/* bits 0-15 are the VM contexts0-15 */
5786 	/* invalidate the cache */
5787 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5788 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5789 				 WRITE_DATA_DST_SEL(0)));
5790 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
5791 	amdgpu_ring_write(ring, 0);
5792 	amdgpu_ring_write(ring, 1 << vm_id);
5793 
5794 	/* wait for the invalidate to complete */
5795 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5796 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5797 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
5798 				 WAIT_REG_MEM_ENGINE(0))); /* me */
5799 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
5800 	amdgpu_ring_write(ring, 0);
5801 	amdgpu_ring_write(ring, 0); /* ref */
5802 	amdgpu_ring_write(ring, 0); /* mask */
5803 	amdgpu_ring_write(ring, 0x20); /* poll interval */
5804 
5805 	/* compute doesn't have PFP */
5806 	if (usepfp) {
5807 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5808 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5809 		amdgpu_ring_write(ring, 0x0);
5810 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5811 		amdgpu_ring_write(ring, 0);
5812 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5813 		amdgpu_ring_write(ring, 0);
5814 	}
5815 }
5816 
5817 static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5818 {
5819 	return ring->adev->wb.wb[ring->rptr_offs];
5820 }
5821 
5822 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5823 {
5824 	return ring->adev->wb.wb[ring->wptr_offs];
5825 }
5826 
5827 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5828 {
5829 	struct amdgpu_device *adev = ring->adev;
5830 
5831 	/* XXX check if swapping is necessary on BE */
5832 	adev->wb.wb[ring->wptr_offs] = ring->wptr;
5833 	WDOORBELL32(ring->doorbell_index, ring->wptr);
5834 }
5835 
5836 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
5837 					     u64 addr, u64 seq,
5838 					     unsigned flags)
5839 {
5840 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5841 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5842 
5843 	/* RELEASE_MEM - flush caches, send int */
5844 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
5845 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
5846 				 EOP_TC_ACTION_EN |
5847 				 EOP_TC_WB_ACTION_EN |
5848 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5849 				 EVENT_INDEX(5)));
5850 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5851 	amdgpu_ring_write(ring, addr & 0xfffffffc);
5852 	amdgpu_ring_write(ring, upper_32_bits(addr));
5853 	amdgpu_ring_write(ring, lower_32_bits(seq));
5854 	amdgpu_ring_write(ring, upper_32_bits(seq));
5855 }
5856 
5857 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5858 						 enum amdgpu_interrupt_state state)
5859 {
5860 	u32 cp_int_cntl;
5861 
5862 	switch (state) {
5863 	case AMDGPU_IRQ_STATE_DISABLE:
5864 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5865 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5866 					    TIME_STAMP_INT_ENABLE, 0);
5867 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5868 		break;
5869 	case AMDGPU_IRQ_STATE_ENABLE:
5870 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5871 		cp_int_cntl =
5872 			REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5873 				      TIME_STAMP_INT_ENABLE, 1);
5874 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5875 		break;
5876 	default:
5877 		break;
5878 	}
5879 }
5880 
5881 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5882 						     int me, int pipe,
5883 						     enum amdgpu_interrupt_state state)
5884 {
5885 	u32 mec_int_cntl, mec_int_cntl_reg;
5886 
5887 	/*
5888 	 * amdgpu controls only pipe 0 of MEC1. That's why this function only
5889 	 * handles the setting of interrupts for this specific pipe. All other
5890 	 * pipes' interrupts are set by amdkfd.
5891 	 */
5892 
5893 	if (me == 1) {
5894 		switch (pipe) {
5895 		case 0:
5896 			mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
5897 			break;
5898 		default:
5899 			DRM_DEBUG("invalid pipe %d\n", pipe);
5900 			return;
5901 		}
5902 	} else {
5903 		DRM_DEBUG("invalid me %d\n", me);
5904 		return;
5905 	}
5906 
5907 	switch (state) {
5908 	case AMDGPU_IRQ_STATE_DISABLE:
5909 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5910 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5911 					     TIME_STAMP_INT_ENABLE, 0);
5912 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5913 		break;
5914 	case AMDGPU_IRQ_STATE_ENABLE:
5915 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5916 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5917 					     TIME_STAMP_INT_ENABLE, 1);
5918 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5919 		break;
5920 	default:
5921 		break;
5922 	}
5923 }
5924 
5925 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5926 					     struct amdgpu_irq_src *source,
5927 					     unsigned type,
5928 					     enum amdgpu_interrupt_state state)
5929 {
5930 	u32 cp_int_cntl;
5931 
5932 	switch (state) {
5933 	case AMDGPU_IRQ_STATE_DISABLE:
5934 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5935 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5936 					    PRIV_REG_INT_ENABLE, 0);
5937 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5938 		break;
5939 	case AMDGPU_IRQ_STATE_ENABLE:
5940 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5941 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5942 					    PRIV_REG_INT_ENABLE, 1);
5943 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5944 		break;
5945 	default:
5946 		break;
5947 	}
5948 
5949 	return 0;
5950 }
5951 
5952 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5953 					      struct amdgpu_irq_src *source,
5954 					      unsigned type,
5955 					      enum amdgpu_interrupt_state state)
5956 {
5957 	u32 cp_int_cntl;
5958 
5959 	switch (state) {
5960 	case AMDGPU_IRQ_STATE_DISABLE:
5961 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5962 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5963 					    PRIV_INSTR_INT_ENABLE, 0);
5964 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5965 		break;
5966 	case AMDGPU_IRQ_STATE_ENABLE:
5967 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5968 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5969 					    PRIV_INSTR_INT_ENABLE, 1);
5970 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5971 		break;
5972 	default:
5973 		break;
5974 	}
5975 
5976 	return 0;
5977 }
5978 
5979 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5980 					    struct amdgpu_irq_src *src,
5981 					    unsigned type,
5982 					    enum amdgpu_interrupt_state state)
5983 {
5984 	switch (type) {
5985 	case AMDGPU_CP_IRQ_GFX_EOP:
5986 		gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
5987 		break;
5988 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5989 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5990 		break;
5991 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5992 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5993 		break;
5994 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5995 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5996 		break;
5997 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5998 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5999 		break;
6000 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6001 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6002 		break;
6003 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6004 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6005 		break;
6006 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6007 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6008 		break;
6009 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6010 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6011 		break;
6012 	default:
6013 		break;
6014 	}
6015 	return 0;
6016 }
6017 
6018 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6019 			    struct amdgpu_irq_src *source,
6020 			    struct amdgpu_iv_entry *entry)
6021 {
6022 	int i;
6023 	u8 me_id, pipe_id, queue_id;
6024 	struct amdgpu_ring *ring;
6025 
6026 	DRM_DEBUG("IH: CP EOP\n");
6027 	me_id = (entry->ring_id & 0x0c) >> 2;
6028 	pipe_id = (entry->ring_id & 0x03) >> 0;
6029 	queue_id = (entry->ring_id & 0x70) >> 4;
6030 
6031 	switch (me_id) {
6032 	case 0:
6033 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6034 		break;
6035 	case 1:
6036 	case 2:
6037 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6038 			ring = &adev->gfx.compute_ring[i];
6039 			/* Per-queue interrupt is supported for MEC starting from VI.
6040 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
6041 			  */
6042 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6043 				amdgpu_fence_process(ring);
6044 		}
6045 		break;
6046 	}
6047 	return 0;
6048 }
6049 
6050 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6051 				 struct amdgpu_irq_src *source,
6052 				 struct amdgpu_iv_entry *entry)
6053 {
6054 	DRM_ERROR("Illegal register access in command stream\n");
6055 	schedule_work(&adev->reset_work);
6056 	return 0;
6057 }
6058 
6059 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6060 				  struct amdgpu_irq_src *source,
6061 				  struct amdgpu_iv_entry *entry)
6062 {
6063 	DRM_ERROR("Illegal instruction in command stream\n");
6064 	schedule_work(&adev->reset_work);
6065 	return 0;
6066 }
6067 
6068 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6069 	.name = "gfx_v8_0",
6070 	.early_init = gfx_v8_0_early_init,
6071 	.late_init = gfx_v8_0_late_init,
6072 	.sw_init = gfx_v8_0_sw_init,
6073 	.sw_fini = gfx_v8_0_sw_fini,
6074 	.hw_init = gfx_v8_0_hw_init,
6075 	.hw_fini = gfx_v8_0_hw_fini,
6076 	.suspend = gfx_v8_0_suspend,
6077 	.resume = gfx_v8_0_resume,
6078 	.is_idle = gfx_v8_0_is_idle,
6079 	.wait_for_idle = gfx_v8_0_wait_for_idle,
6080 	.soft_reset = gfx_v8_0_soft_reset,
6081 	.set_clockgating_state = gfx_v8_0_set_clockgating_state,
6082 	.set_powergating_state = gfx_v8_0_set_powergating_state,
6083 };
6084 
6085 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6086 	.get_rptr = gfx_v8_0_ring_get_rptr_gfx,
6087 	.get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6088 	.set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6089 	.parse_cs = NULL,
6090 	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6091 	.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6092 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6093 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6094 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6095 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6096 	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6097 	.test_ring = gfx_v8_0_ring_test_ring,
6098 	.test_ib = gfx_v8_0_ring_test_ib,
6099 	.insert_nop = amdgpu_ring_insert_nop,
6100 	.pad_ib = amdgpu_ring_generic_pad_ib,
6101 };
6102 
6103 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6104 	.get_rptr = gfx_v8_0_ring_get_rptr_compute,
6105 	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
6106 	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
6107 	.parse_cs = NULL,
6108 	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
6109 	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
6110 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6111 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6112 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6113 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6114 	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6115 	.test_ring = gfx_v8_0_ring_test_ring,
6116 	.test_ib = gfx_v8_0_ring_test_ib,
6117 	.insert_nop = amdgpu_ring_insert_nop,
6118 	.pad_ib = amdgpu_ring_generic_pad_ib,
6119 };
6120 
6121 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6122 {
6123 	int i;
6124 
6125 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6126 		adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6127 
6128 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6129 		adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6130 }
6131 
6132 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6133 	.set = gfx_v8_0_set_eop_interrupt_state,
6134 	.process = gfx_v8_0_eop_irq,
6135 };
6136 
6137 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6138 	.set = gfx_v8_0_set_priv_reg_fault_state,
6139 	.process = gfx_v8_0_priv_reg_irq,
6140 };
6141 
6142 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6143 	.set = gfx_v8_0_set_priv_inst_fault_state,
6144 	.process = gfx_v8_0_priv_inst_irq,
6145 };
6146 
6147 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6148 {
6149 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6150 	adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6151 
6152 	adev->gfx.priv_reg_irq.num_types = 1;
6153 	adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6154 
6155 	adev->gfx.priv_inst_irq.num_types = 1;
6156 	adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6157 }
6158 
6159 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6160 {
6161 	switch (adev->asic_type) {
6162 	case CHIP_TOPAZ:
6163 	case CHIP_STONEY:
6164 		adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6165 		break;
6166 	case CHIP_CARRIZO:
6167 		adev->gfx.rlc.funcs = &cz_rlc_funcs;
6168 		break;
6169 	default:
6170 		adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
6171 		break;
6172 	}
6173 }
6174 
6175 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6176 {
6177 	/* init asci gds info */
6178 	adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6179 	adev->gds.gws.total_size = 64;
6180 	adev->gds.oa.total_size = 16;
6181 
6182 	if (adev->gds.mem.total_size == 64 * 1024) {
6183 		adev->gds.mem.gfx_partition_size = 4096;
6184 		adev->gds.mem.cs_partition_size = 4096;
6185 
6186 		adev->gds.gws.gfx_partition_size = 4;
6187 		adev->gds.gws.cs_partition_size = 4;
6188 
6189 		adev->gds.oa.gfx_partition_size = 4;
6190 		adev->gds.oa.cs_partition_size = 1;
6191 	} else {
6192 		adev->gds.mem.gfx_partition_size = 1024;
6193 		adev->gds.mem.cs_partition_size = 1024;
6194 
6195 		adev->gds.gws.gfx_partition_size = 16;
6196 		adev->gds.gws.cs_partition_size = 16;
6197 
6198 		adev->gds.oa.gfx_partition_size = 4;
6199 		adev->gds.oa.cs_partition_size = 4;
6200 	}
6201 }
6202 
6203 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6204 {
6205 	u32 data, mask;
6206 
6207 	data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
6208 	data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6209 
6210 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6211 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6212 
6213 	mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
6214 
6215 	return (~data) & mask;
6216 }
6217 
6218 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6219 {
6220 	int i, j, k, counter, active_cu_number = 0;
6221 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6222 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6223 
6224 	memset(cu_info, 0, sizeof(*cu_info));
6225 
6226 	mutex_lock(&adev->grbm_idx_mutex);
6227 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6228 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6229 			mask = 1;
6230 			ao_bitmap = 0;
6231 			counter = 0;
6232 			gfx_v8_0_select_se_sh(adev, i, j);
6233 			bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6234 			cu_info->bitmap[i][j] = bitmap;
6235 
6236 			for (k = 0; k < 16; k ++) {
6237 				if (bitmap & mask) {
6238 					if (counter < 2)
6239 						ao_bitmap |= mask;
6240 					counter ++;
6241 				}
6242 				mask <<= 1;
6243 			}
6244 			active_cu_number += counter;
6245 			ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6246 		}
6247 	}
6248 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
6249 	mutex_unlock(&adev->grbm_idx_mutex);
6250 
6251 	cu_info->number = active_cu_number;
6252 	cu_info->ao_cu_mask = ao_cu_mask;
6253 }
6254