xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c (revision de2bdb3d)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vid.h"
29 #include "amdgpu_ucode.h"
30 #include "amdgpu_atombios.h"
31 #include "atombios_i2c.h"
32 #include "clearstate_vi.h"
33 
34 #include "gmc/gmc_8_2_d.h"
35 #include "gmc/gmc_8_2_sh_mask.h"
36 
37 #include "oss/oss_3_0_d.h"
38 #include "oss/oss_3_0_sh_mask.h"
39 
40 #include "bif/bif_5_0_d.h"
41 #include "bif/bif_5_0_sh_mask.h"
42 
43 #include "gca/gfx_8_0_d.h"
44 #include "gca/gfx_8_0_enum.h"
45 #include "gca/gfx_8_0_sh_mask.h"
46 #include "gca/gfx_8_0_enum.h"
47 
48 #include "dce/dce_10_0_d.h"
49 #include "dce/dce_10_0_sh_mask.h"
50 
51 #include "smu/smu_7_1_3_d.h"
52 
53 #define GFX8_NUM_GFX_RINGS     1
54 #define GFX8_NUM_COMPUTE_RINGS 8
55 
56 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
59 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
60 
61 #define ARRAY_MODE(x)					((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
62 #define PIPE_CONFIG(x)					((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
63 #define TILE_SPLIT(x)					((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
64 #define MICRO_TILE_MODE_NEW(x)				((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
65 #define SAMPLE_SPLIT(x)					((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
66 #define BANK_WIDTH(x)					((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
67 #define BANK_HEIGHT(x)					((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
68 #define MACRO_TILE_ASPECT(x)				((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
69 #define NUM_BANKS(x)					((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
70 
71 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
72 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
73 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
74 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
76 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
77 
78 /* BPM SERDES CMD */
79 #define SET_BPM_SERDES_CMD    1
80 #define CLE_BPM_SERDES_CMD    0
81 
82 /* BPM Register Address*/
83 enum {
84 	BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
85 	BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
86 	BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
87 	BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
88 	BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
89 	BPM_REG_FGCG_MAX
90 };
91 
92 #define RLC_FormatDirectRegListLength        14
93 
94 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
95 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
100 
101 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
106 
107 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
113 
114 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
119 
120 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
126 
127 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
128 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
133 
134 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
140 
141 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
142 {
143 	{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
144 	{mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
145 	{mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
146 	{mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
147 	{mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
148 	{mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
149 	{mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
150 	{mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
151 	{mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
152 	{mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
153 	{mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
154 	{mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
155 	{mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
156 	{mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
157 	{mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
158 	{mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
159 };
160 
161 static const u32 golden_settings_tonga_a11[] =
162 {
163 	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
164 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
165 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
166 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
167 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
168 	mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
169 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
170 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
171 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
172 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
173 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
174 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
175 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
176 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
177 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
178 };
179 
180 static const u32 tonga_golden_common_all[] =
181 {
182 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
183 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
184 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
185 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
186 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
187 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
188 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
189 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
190 };
191 
192 static const u32 tonga_mgcg_cgcg_init[] =
193 {
194 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
195 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
196 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
197 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
198 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
199 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
200 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
201 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
202 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
203 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
204 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
205 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
206 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
207 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
208 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
209 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
210 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
211 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
212 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
213 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
214 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
215 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
216 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
217 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
218 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
219 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
220 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
221 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
222 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
223 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
224 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
225 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
226 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
227 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
228 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
229 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
230 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
231 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
232 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
233 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
234 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
235 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
236 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
237 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
238 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
239 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
240 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
241 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
242 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
243 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
244 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
245 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
246 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
247 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
248 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
249 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
250 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
251 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
252 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
253 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
254 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
255 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
256 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
257 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
258 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
259 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
260 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
261 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
262 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
263 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
264 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
265 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
266 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
267 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
268 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
269 };
270 
271 static const u32 golden_settings_polaris11_a11[] =
272 {
273 	mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
274 	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
275 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
276 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
277 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
278 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
279 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
280 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
281 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
282 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
283 	mmSQ_CONFIG, 0x07f80000, 0x01180000,
284 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
285 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
286 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
287 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
288 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
289 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
290 };
291 
292 static const u32 polaris11_golden_common_all[] =
293 {
294 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
295 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
296 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
297 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
298 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
299 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
300 };
301 
302 static const u32 golden_settings_polaris10_a11[] =
303 {
304 	mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
305 	mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
306 	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
307 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
308 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
309 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
310 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
311 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
312 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
313 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
314 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
315 	mmSQ_CONFIG, 0x07f80000, 0x07180000,
316 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
317 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
318 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
319 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
320 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
321 };
322 
323 static const u32 polaris10_golden_common_all[] =
324 {
325 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
326 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
327 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
328 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
329 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
330 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
331 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
332 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
333 };
334 
335 static const u32 fiji_golden_common_all[] =
336 {
337 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
338 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
339 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
340 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
341 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
342 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
343 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
344 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
345 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
346 	mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
347 };
348 
349 static const u32 golden_settings_fiji_a10[] =
350 {
351 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
352 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
353 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
354 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
355 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
356 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
357 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
358 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
359 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
360 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
361 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
362 };
363 
364 static const u32 fiji_mgcg_cgcg_init[] =
365 {
366 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
367 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
368 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
369 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
370 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
371 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
372 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
373 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
374 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
375 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
376 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
377 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
378 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
379 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
380 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
381 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
382 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
383 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
384 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
385 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
386 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
387 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
388 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
389 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
390 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
391 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
392 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
393 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
394 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
395 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
396 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
397 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
398 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
399 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
400 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
401 };
402 
403 static const u32 golden_settings_iceland_a11[] =
404 {
405 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
406 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
407 	mmDB_DEBUG3, 0xc0000000, 0xc0000000,
408 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
409 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
410 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
411 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
412 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
413 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
414 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
415 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
416 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
417 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
418 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
419 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
420 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
421 };
422 
423 static const u32 iceland_golden_common_all[] =
424 {
425 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
426 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
427 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
428 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
429 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
430 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
431 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
432 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
433 };
434 
435 static const u32 iceland_mgcg_cgcg_init[] =
436 {
437 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
438 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
439 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
440 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
441 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
442 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
443 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
444 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
445 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
446 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
447 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
448 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
449 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
450 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
451 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
452 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
453 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
454 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
455 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
456 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
457 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
458 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
459 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
460 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
461 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
462 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
463 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
464 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
465 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
466 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
467 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
468 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
469 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
470 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
471 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
472 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
473 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
474 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
475 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
476 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
477 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
478 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
479 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
480 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
481 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
482 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
483 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
484 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
485 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
486 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
487 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
488 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
489 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
490 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
491 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
492 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
493 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
494 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
495 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
496 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
497 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
498 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
499 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
500 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
501 };
502 
503 static const u32 cz_golden_settings_a11[] =
504 {
505 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
506 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
507 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
508 	mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
509 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
510 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
511 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
512 	mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
513 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
514 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
515 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
516 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
517 };
518 
519 static const u32 cz_golden_common_all[] =
520 {
521 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
522 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
523 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
524 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
525 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
526 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
527 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
528 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
529 };
530 
531 static const u32 cz_mgcg_cgcg_init[] =
532 {
533 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
534 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
535 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
536 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
537 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
538 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
539 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
540 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
541 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
542 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
543 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
544 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
545 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
546 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
547 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
548 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
549 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
550 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
551 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
552 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
553 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
554 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
555 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
556 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
557 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
558 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
559 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
560 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
561 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
562 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
563 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
564 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
565 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
566 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
567 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
568 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
569 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
570 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
571 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
572 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
573 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
574 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
575 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
576 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
577 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
578 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
579 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
580 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
581 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
582 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
583 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
584 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
585 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
586 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
587 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
588 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
589 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
590 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
591 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
592 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
593 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
594 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
595 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
596 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
597 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
598 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
599 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
600 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
601 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
602 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
603 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
604 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
605 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
606 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
607 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
608 };
609 
610 static const u32 stoney_golden_settings_a11[] =
611 {
612 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
613 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
614 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
615 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
616 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
617 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
618 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
619 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
620 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
621 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
622 };
623 
624 static const u32 stoney_golden_common_all[] =
625 {
626 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
627 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
628 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
629 	mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
630 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
631 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
632 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
633 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
634 };
635 
636 static const u32 stoney_mgcg_cgcg_init[] =
637 {
638 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
639 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
640 	mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
641 	mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
642 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
643 };
644 
645 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
646 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
647 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
648 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
649 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
650 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
651 
652 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
653 {
654 	switch (adev->asic_type) {
655 	case CHIP_TOPAZ:
656 		amdgpu_program_register_sequence(adev,
657 						 iceland_mgcg_cgcg_init,
658 						 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
659 		amdgpu_program_register_sequence(adev,
660 						 golden_settings_iceland_a11,
661 						 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
662 		amdgpu_program_register_sequence(adev,
663 						 iceland_golden_common_all,
664 						 (const u32)ARRAY_SIZE(iceland_golden_common_all));
665 		break;
666 	case CHIP_FIJI:
667 		amdgpu_program_register_sequence(adev,
668 						 fiji_mgcg_cgcg_init,
669 						 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
670 		amdgpu_program_register_sequence(adev,
671 						 golden_settings_fiji_a10,
672 						 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
673 		amdgpu_program_register_sequence(adev,
674 						 fiji_golden_common_all,
675 						 (const u32)ARRAY_SIZE(fiji_golden_common_all));
676 		break;
677 
678 	case CHIP_TONGA:
679 		amdgpu_program_register_sequence(adev,
680 						 tonga_mgcg_cgcg_init,
681 						 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
682 		amdgpu_program_register_sequence(adev,
683 						 golden_settings_tonga_a11,
684 						 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
685 		amdgpu_program_register_sequence(adev,
686 						 tonga_golden_common_all,
687 						 (const u32)ARRAY_SIZE(tonga_golden_common_all));
688 		break;
689 	case CHIP_POLARIS11:
690 		amdgpu_program_register_sequence(adev,
691 						 golden_settings_polaris11_a11,
692 						 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
693 		amdgpu_program_register_sequence(adev,
694 						 polaris11_golden_common_all,
695 						 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
696 		break;
697 	case CHIP_POLARIS10:
698 		amdgpu_program_register_sequence(adev,
699 						 golden_settings_polaris10_a11,
700 						 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
701 		amdgpu_program_register_sequence(adev,
702 						 polaris10_golden_common_all,
703 						 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
704 		WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
705 		if (adev->pdev->revision == 0xc7 &&
706 		    ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
707 		     (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
708 		     (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
709 			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
710 			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
711 		}
712 		break;
713 	case CHIP_CARRIZO:
714 		amdgpu_program_register_sequence(adev,
715 						 cz_mgcg_cgcg_init,
716 						 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
717 		amdgpu_program_register_sequence(adev,
718 						 cz_golden_settings_a11,
719 						 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
720 		amdgpu_program_register_sequence(adev,
721 						 cz_golden_common_all,
722 						 (const u32)ARRAY_SIZE(cz_golden_common_all));
723 		break;
724 	case CHIP_STONEY:
725 		amdgpu_program_register_sequence(adev,
726 						 stoney_mgcg_cgcg_init,
727 						 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
728 		amdgpu_program_register_sequence(adev,
729 						 stoney_golden_settings_a11,
730 						 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
731 		amdgpu_program_register_sequence(adev,
732 						 stoney_golden_common_all,
733 						 (const u32)ARRAY_SIZE(stoney_golden_common_all));
734 		break;
735 	default:
736 		break;
737 	}
738 }
739 
740 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
741 {
742 	int i;
743 
744 	adev->gfx.scratch.num_reg = 7;
745 	adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
746 	for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
747 		adev->gfx.scratch.free[i] = true;
748 		adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
749 	}
750 }
751 
752 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
753 {
754 	struct amdgpu_device *adev = ring->adev;
755 	uint32_t scratch;
756 	uint32_t tmp = 0;
757 	unsigned i;
758 	int r;
759 
760 	r = amdgpu_gfx_scratch_get(adev, &scratch);
761 	if (r) {
762 		DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
763 		return r;
764 	}
765 	WREG32(scratch, 0xCAFEDEAD);
766 	r = amdgpu_ring_alloc(ring, 3);
767 	if (r) {
768 		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
769 			  ring->idx, r);
770 		amdgpu_gfx_scratch_free(adev, scratch);
771 		return r;
772 	}
773 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
774 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
775 	amdgpu_ring_write(ring, 0xDEADBEEF);
776 	amdgpu_ring_commit(ring);
777 
778 	for (i = 0; i < adev->usec_timeout; i++) {
779 		tmp = RREG32(scratch);
780 		if (tmp == 0xDEADBEEF)
781 			break;
782 		DRM_UDELAY(1);
783 	}
784 	if (i < adev->usec_timeout) {
785 		DRM_INFO("ring test on %d succeeded in %d usecs\n",
786 			 ring->idx, i);
787 	} else {
788 		DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
789 			  ring->idx, scratch, tmp);
790 		r = -EINVAL;
791 	}
792 	amdgpu_gfx_scratch_free(adev, scratch);
793 	return r;
794 }
795 
796 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
797 {
798 	struct amdgpu_device *adev = ring->adev;
799 	struct amdgpu_ib ib;
800 	struct dma_fence *f = NULL;
801 	uint32_t scratch;
802 	uint32_t tmp = 0;
803 	long r;
804 
805 	r = amdgpu_gfx_scratch_get(adev, &scratch);
806 	if (r) {
807 		DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
808 		return r;
809 	}
810 	WREG32(scratch, 0xCAFEDEAD);
811 	memset(&ib, 0, sizeof(ib));
812 	r = amdgpu_ib_get(adev, NULL, 256, &ib);
813 	if (r) {
814 		DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
815 		goto err1;
816 	}
817 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
818 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
819 	ib.ptr[2] = 0xDEADBEEF;
820 	ib.length_dw = 3;
821 
822 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
823 	if (r)
824 		goto err2;
825 
826 	r = dma_fence_wait_timeout(f, false, timeout);
827 	if (r == 0) {
828 		DRM_ERROR("amdgpu: IB test timed out.\n");
829 		r = -ETIMEDOUT;
830 		goto err2;
831 	} else if (r < 0) {
832 		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
833 		goto err2;
834 	}
835 	tmp = RREG32(scratch);
836 	if (tmp == 0xDEADBEEF) {
837 		DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
838 		r = 0;
839 	} else {
840 		DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
841 			  scratch, tmp);
842 		r = -EINVAL;
843 	}
844 err2:
845 	amdgpu_ib_free(adev, &ib, NULL);
846 	dma_fence_put(f);
847 err1:
848 	amdgpu_gfx_scratch_free(adev, scratch);
849 	return r;
850 }
851 
852 
853 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
854 	release_firmware(adev->gfx.pfp_fw);
855 	adev->gfx.pfp_fw = NULL;
856 	release_firmware(adev->gfx.me_fw);
857 	adev->gfx.me_fw = NULL;
858 	release_firmware(adev->gfx.ce_fw);
859 	adev->gfx.ce_fw = NULL;
860 	release_firmware(adev->gfx.rlc_fw);
861 	adev->gfx.rlc_fw = NULL;
862 	release_firmware(adev->gfx.mec_fw);
863 	adev->gfx.mec_fw = NULL;
864 	if ((adev->asic_type != CHIP_STONEY) &&
865 	    (adev->asic_type != CHIP_TOPAZ))
866 		release_firmware(adev->gfx.mec2_fw);
867 	adev->gfx.mec2_fw = NULL;
868 
869 	kfree(adev->gfx.rlc.register_list_format);
870 }
871 
872 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
873 {
874 	const char *chip_name;
875 	char fw_name[30];
876 	int err;
877 	struct amdgpu_firmware_info *info = NULL;
878 	const struct common_firmware_header *header = NULL;
879 	const struct gfx_firmware_header_v1_0 *cp_hdr;
880 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
881 	unsigned int *tmp = NULL, i;
882 
883 	DRM_DEBUG("\n");
884 
885 	switch (adev->asic_type) {
886 	case CHIP_TOPAZ:
887 		chip_name = "topaz";
888 		break;
889 	case CHIP_TONGA:
890 		chip_name = "tonga";
891 		break;
892 	case CHIP_CARRIZO:
893 		chip_name = "carrizo";
894 		break;
895 	case CHIP_FIJI:
896 		chip_name = "fiji";
897 		break;
898 	case CHIP_POLARIS11:
899 		chip_name = "polaris11";
900 		break;
901 	case CHIP_POLARIS10:
902 		chip_name = "polaris10";
903 		break;
904 	case CHIP_STONEY:
905 		chip_name = "stoney";
906 		break;
907 	default:
908 		BUG();
909 	}
910 
911 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
912 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
913 	if (err)
914 		goto out;
915 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
916 	if (err)
917 		goto out;
918 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
919 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
920 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
921 
922 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
923 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
924 	if (err)
925 		goto out;
926 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
927 	if (err)
928 		goto out;
929 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
930 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
931 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
932 
933 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
934 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
935 	if (err)
936 		goto out;
937 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
938 	if (err)
939 		goto out;
940 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
941 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
942 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
943 
944 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
945 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
946 	if (err)
947 		goto out;
948 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
949 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
950 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
951 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
952 
953 	adev->gfx.rlc.save_and_restore_offset =
954 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
955 	adev->gfx.rlc.clear_state_descriptor_offset =
956 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
957 	adev->gfx.rlc.avail_scratch_ram_locations =
958 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
959 	adev->gfx.rlc.reg_restore_list_size =
960 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
961 	adev->gfx.rlc.reg_list_format_start =
962 			le32_to_cpu(rlc_hdr->reg_list_format_start);
963 	adev->gfx.rlc.reg_list_format_separate_start =
964 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
965 	adev->gfx.rlc.starting_offsets_start =
966 			le32_to_cpu(rlc_hdr->starting_offsets_start);
967 	adev->gfx.rlc.reg_list_format_size_bytes =
968 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
969 	adev->gfx.rlc.reg_list_size_bytes =
970 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
971 
972 	adev->gfx.rlc.register_list_format =
973 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
974 					adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
975 
976 	if (!adev->gfx.rlc.register_list_format) {
977 		err = -ENOMEM;
978 		goto out;
979 	}
980 
981 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
982 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
983 	for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
984 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
985 
986 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
987 
988 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
989 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
990 	for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
991 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
992 
993 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
994 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
995 	if (err)
996 		goto out;
997 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
998 	if (err)
999 		goto out;
1000 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1001 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1002 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1003 
1004 	if ((adev->asic_type != CHIP_STONEY) &&
1005 	    (adev->asic_type != CHIP_TOPAZ)) {
1006 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1007 		err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1008 		if (!err) {
1009 			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1010 			if (err)
1011 				goto out;
1012 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1013 				adev->gfx.mec2_fw->data;
1014 			adev->gfx.mec2_fw_version =
1015 				le32_to_cpu(cp_hdr->header.ucode_version);
1016 			adev->gfx.mec2_feature_version =
1017 				le32_to_cpu(cp_hdr->ucode_feature_version);
1018 		} else {
1019 			err = 0;
1020 			adev->gfx.mec2_fw = NULL;
1021 		}
1022 	}
1023 
1024 	if (adev->firmware.smu_load) {
1025 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1026 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1027 		info->fw = adev->gfx.pfp_fw;
1028 		header = (const struct common_firmware_header *)info->fw->data;
1029 		adev->firmware.fw_size +=
1030 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1031 
1032 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1033 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1034 		info->fw = adev->gfx.me_fw;
1035 		header = (const struct common_firmware_header *)info->fw->data;
1036 		adev->firmware.fw_size +=
1037 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1038 
1039 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1040 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1041 		info->fw = adev->gfx.ce_fw;
1042 		header = (const struct common_firmware_header *)info->fw->data;
1043 		adev->firmware.fw_size +=
1044 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1045 
1046 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1047 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1048 		info->fw = adev->gfx.rlc_fw;
1049 		header = (const struct common_firmware_header *)info->fw->data;
1050 		adev->firmware.fw_size +=
1051 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1052 
1053 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1054 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1055 		info->fw = adev->gfx.mec_fw;
1056 		header = (const struct common_firmware_header *)info->fw->data;
1057 		adev->firmware.fw_size +=
1058 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1059 
1060 		/* we need account JT in */
1061 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1062 		adev->firmware.fw_size +=
1063 			ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1064 
1065 		if (amdgpu_sriov_vf(adev)) {
1066 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1067 			info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1068 			info->fw = adev->gfx.mec_fw;
1069 			adev->firmware.fw_size +=
1070 				ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1071 		}
1072 
1073 		if (adev->gfx.mec2_fw) {
1074 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1075 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1076 			info->fw = adev->gfx.mec2_fw;
1077 			header = (const struct common_firmware_header *)info->fw->data;
1078 			adev->firmware.fw_size +=
1079 				ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1080 		}
1081 
1082 	}
1083 
1084 out:
1085 	if (err) {
1086 		dev_err(adev->dev,
1087 			"gfx8: Failed to load firmware \"%s\"\n",
1088 			fw_name);
1089 		release_firmware(adev->gfx.pfp_fw);
1090 		adev->gfx.pfp_fw = NULL;
1091 		release_firmware(adev->gfx.me_fw);
1092 		adev->gfx.me_fw = NULL;
1093 		release_firmware(adev->gfx.ce_fw);
1094 		adev->gfx.ce_fw = NULL;
1095 		release_firmware(adev->gfx.rlc_fw);
1096 		adev->gfx.rlc_fw = NULL;
1097 		release_firmware(adev->gfx.mec_fw);
1098 		adev->gfx.mec_fw = NULL;
1099 		release_firmware(adev->gfx.mec2_fw);
1100 		adev->gfx.mec2_fw = NULL;
1101 	}
1102 	return err;
1103 }
1104 
1105 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1106 				    volatile u32 *buffer)
1107 {
1108 	u32 count = 0, i;
1109 	const struct cs_section_def *sect = NULL;
1110 	const struct cs_extent_def *ext = NULL;
1111 
1112 	if (adev->gfx.rlc.cs_data == NULL)
1113 		return;
1114 	if (buffer == NULL)
1115 		return;
1116 
1117 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1118 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1119 
1120 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1121 	buffer[count++] = cpu_to_le32(0x80000000);
1122 	buffer[count++] = cpu_to_le32(0x80000000);
1123 
1124 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1125 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1126 			if (sect->id == SECT_CONTEXT) {
1127 				buffer[count++] =
1128 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1129 				buffer[count++] = cpu_to_le32(ext->reg_index -
1130 						PACKET3_SET_CONTEXT_REG_START);
1131 				for (i = 0; i < ext->reg_count; i++)
1132 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1133 			} else {
1134 				return;
1135 			}
1136 		}
1137 	}
1138 
1139 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1140 	buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1141 			PACKET3_SET_CONTEXT_REG_START);
1142 	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1143 	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1144 
1145 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1146 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1147 
1148 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1149 	buffer[count++] = cpu_to_le32(0);
1150 }
1151 
1152 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1153 {
1154 	const __le32 *fw_data;
1155 	volatile u32 *dst_ptr;
1156 	int me, i, max_me = 4;
1157 	u32 bo_offset = 0;
1158 	u32 table_offset, table_size;
1159 
1160 	if (adev->asic_type == CHIP_CARRIZO)
1161 		max_me = 5;
1162 
1163 	/* write the cp table buffer */
1164 	dst_ptr = adev->gfx.rlc.cp_table_ptr;
1165 	for (me = 0; me < max_me; me++) {
1166 		if (me == 0) {
1167 			const struct gfx_firmware_header_v1_0 *hdr =
1168 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1169 			fw_data = (const __le32 *)
1170 				(adev->gfx.ce_fw->data +
1171 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1172 			table_offset = le32_to_cpu(hdr->jt_offset);
1173 			table_size = le32_to_cpu(hdr->jt_size);
1174 		} else if (me == 1) {
1175 			const struct gfx_firmware_header_v1_0 *hdr =
1176 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1177 			fw_data = (const __le32 *)
1178 				(adev->gfx.pfp_fw->data +
1179 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1180 			table_offset = le32_to_cpu(hdr->jt_offset);
1181 			table_size = le32_to_cpu(hdr->jt_size);
1182 		} else if (me == 2) {
1183 			const struct gfx_firmware_header_v1_0 *hdr =
1184 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1185 			fw_data = (const __le32 *)
1186 				(adev->gfx.me_fw->data +
1187 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1188 			table_offset = le32_to_cpu(hdr->jt_offset);
1189 			table_size = le32_to_cpu(hdr->jt_size);
1190 		} else if (me == 3) {
1191 			const struct gfx_firmware_header_v1_0 *hdr =
1192 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1193 			fw_data = (const __le32 *)
1194 				(adev->gfx.mec_fw->data +
1195 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1196 			table_offset = le32_to_cpu(hdr->jt_offset);
1197 			table_size = le32_to_cpu(hdr->jt_size);
1198 		} else  if (me == 4) {
1199 			const struct gfx_firmware_header_v1_0 *hdr =
1200 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1201 			fw_data = (const __le32 *)
1202 				(adev->gfx.mec2_fw->data +
1203 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1204 			table_offset = le32_to_cpu(hdr->jt_offset);
1205 			table_size = le32_to_cpu(hdr->jt_size);
1206 		}
1207 
1208 		for (i = 0; i < table_size; i ++) {
1209 			dst_ptr[bo_offset + i] =
1210 				cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1211 		}
1212 
1213 		bo_offset += table_size;
1214 	}
1215 }
1216 
1217 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1218 {
1219 	int r;
1220 
1221 	/* clear state block */
1222 	if (adev->gfx.rlc.clear_state_obj) {
1223 		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1224 		if (unlikely(r != 0))
1225 			dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
1226 		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1227 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1228 		amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1229 		adev->gfx.rlc.clear_state_obj = NULL;
1230 	}
1231 
1232 	/* jump table block */
1233 	if (adev->gfx.rlc.cp_table_obj) {
1234 		r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1235 		if (unlikely(r != 0))
1236 			dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1237 		amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1238 		amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1239 		amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1240 		adev->gfx.rlc.cp_table_obj = NULL;
1241 	}
1242 }
1243 
1244 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1245 {
1246 	volatile u32 *dst_ptr;
1247 	u32 dws;
1248 	const struct cs_section_def *cs_data;
1249 	int r;
1250 
1251 	adev->gfx.rlc.cs_data = vi_cs_data;
1252 
1253 	cs_data = adev->gfx.rlc.cs_data;
1254 
1255 	if (cs_data) {
1256 		/* clear state block */
1257 		adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1258 
1259 		if (adev->gfx.rlc.clear_state_obj == NULL) {
1260 			r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1261 					     AMDGPU_GEM_DOMAIN_VRAM,
1262 					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1263 					     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1264 					     NULL, NULL,
1265 					     &adev->gfx.rlc.clear_state_obj);
1266 			if (r) {
1267 				dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1268 				gfx_v8_0_rlc_fini(adev);
1269 				return r;
1270 			}
1271 		}
1272 		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1273 		if (unlikely(r != 0)) {
1274 			gfx_v8_0_rlc_fini(adev);
1275 			return r;
1276 		}
1277 		r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1278 				  &adev->gfx.rlc.clear_state_gpu_addr);
1279 		if (r) {
1280 			amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1281 			dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
1282 			gfx_v8_0_rlc_fini(adev);
1283 			return r;
1284 		}
1285 
1286 		r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1287 		if (r) {
1288 			dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
1289 			gfx_v8_0_rlc_fini(adev);
1290 			return r;
1291 		}
1292 		/* set up the cs buffer */
1293 		dst_ptr = adev->gfx.rlc.cs_ptr;
1294 		gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1295 		amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1296 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1297 	}
1298 
1299 	if ((adev->asic_type == CHIP_CARRIZO) ||
1300 	    (adev->asic_type == CHIP_STONEY)) {
1301 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1302 		if (adev->gfx.rlc.cp_table_obj == NULL) {
1303 			r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1304 					     AMDGPU_GEM_DOMAIN_VRAM,
1305 					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1306 					     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1307 					     NULL, NULL,
1308 					     &adev->gfx.rlc.cp_table_obj);
1309 			if (r) {
1310 				dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1311 				return r;
1312 			}
1313 		}
1314 
1315 		r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1316 		if (unlikely(r != 0)) {
1317 			dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1318 			return r;
1319 		}
1320 		r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1321 				  &adev->gfx.rlc.cp_table_gpu_addr);
1322 		if (r) {
1323 			amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1324 			dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
1325 			return r;
1326 		}
1327 		r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1328 		if (r) {
1329 			dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1330 			return r;
1331 		}
1332 
1333 		cz_init_cp_jump_table(adev);
1334 
1335 		amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1336 		amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1337 	}
1338 
1339 	return 0;
1340 }
1341 
1342 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1343 {
1344 	int r;
1345 
1346 	if (adev->gfx.mec.hpd_eop_obj) {
1347 		r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1348 		if (unlikely(r != 0))
1349 			dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1350 		amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1351 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1352 		amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1353 		adev->gfx.mec.hpd_eop_obj = NULL;
1354 	}
1355 }
1356 
1357 #define MEC_HPD_SIZE 2048
1358 
1359 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1360 {
1361 	int r;
1362 	u32 *hpd;
1363 
1364 	/*
1365 	 * we assign only 1 pipe because all other pipes will
1366 	 * be handled by KFD
1367 	 */
1368 	adev->gfx.mec.num_mec = 1;
1369 	adev->gfx.mec.num_pipe = 1;
1370 	adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1371 
1372 	if (adev->gfx.mec.hpd_eop_obj == NULL) {
1373 		r = amdgpu_bo_create(adev,
1374 				     adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
1375 				     PAGE_SIZE, true,
1376 				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1377 				     &adev->gfx.mec.hpd_eop_obj);
1378 		if (r) {
1379 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1380 			return r;
1381 		}
1382 	}
1383 
1384 	r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1385 	if (unlikely(r != 0)) {
1386 		gfx_v8_0_mec_fini(adev);
1387 		return r;
1388 	}
1389 	r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1390 			  &adev->gfx.mec.hpd_eop_gpu_addr);
1391 	if (r) {
1392 		dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1393 		gfx_v8_0_mec_fini(adev);
1394 		return r;
1395 	}
1396 	r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1397 	if (r) {
1398 		dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1399 		gfx_v8_0_mec_fini(adev);
1400 		return r;
1401 	}
1402 
1403 	memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
1404 
1405 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1406 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1407 
1408 	return 0;
1409 }
1410 
1411 static const u32 vgpr_init_compute_shader[] =
1412 {
1413 	0x7e000209, 0x7e020208,
1414 	0x7e040207, 0x7e060206,
1415 	0x7e080205, 0x7e0a0204,
1416 	0x7e0c0203, 0x7e0e0202,
1417 	0x7e100201, 0x7e120200,
1418 	0x7e140209, 0x7e160208,
1419 	0x7e180207, 0x7e1a0206,
1420 	0x7e1c0205, 0x7e1e0204,
1421 	0x7e200203, 0x7e220202,
1422 	0x7e240201, 0x7e260200,
1423 	0x7e280209, 0x7e2a0208,
1424 	0x7e2c0207, 0x7e2e0206,
1425 	0x7e300205, 0x7e320204,
1426 	0x7e340203, 0x7e360202,
1427 	0x7e380201, 0x7e3a0200,
1428 	0x7e3c0209, 0x7e3e0208,
1429 	0x7e400207, 0x7e420206,
1430 	0x7e440205, 0x7e460204,
1431 	0x7e480203, 0x7e4a0202,
1432 	0x7e4c0201, 0x7e4e0200,
1433 	0x7e500209, 0x7e520208,
1434 	0x7e540207, 0x7e560206,
1435 	0x7e580205, 0x7e5a0204,
1436 	0x7e5c0203, 0x7e5e0202,
1437 	0x7e600201, 0x7e620200,
1438 	0x7e640209, 0x7e660208,
1439 	0x7e680207, 0x7e6a0206,
1440 	0x7e6c0205, 0x7e6e0204,
1441 	0x7e700203, 0x7e720202,
1442 	0x7e740201, 0x7e760200,
1443 	0x7e780209, 0x7e7a0208,
1444 	0x7e7c0207, 0x7e7e0206,
1445 	0xbf8a0000, 0xbf810000,
1446 };
1447 
1448 static const u32 sgpr_init_compute_shader[] =
1449 {
1450 	0xbe8a0100, 0xbe8c0102,
1451 	0xbe8e0104, 0xbe900106,
1452 	0xbe920108, 0xbe940100,
1453 	0xbe960102, 0xbe980104,
1454 	0xbe9a0106, 0xbe9c0108,
1455 	0xbe9e0100, 0xbea00102,
1456 	0xbea20104, 0xbea40106,
1457 	0xbea60108, 0xbea80100,
1458 	0xbeaa0102, 0xbeac0104,
1459 	0xbeae0106, 0xbeb00108,
1460 	0xbeb20100, 0xbeb40102,
1461 	0xbeb60104, 0xbeb80106,
1462 	0xbeba0108, 0xbebc0100,
1463 	0xbebe0102, 0xbec00104,
1464 	0xbec20106, 0xbec40108,
1465 	0xbec60100, 0xbec80102,
1466 	0xbee60004, 0xbee70005,
1467 	0xbeea0006, 0xbeeb0007,
1468 	0xbee80008, 0xbee90009,
1469 	0xbefc0000, 0xbf8a0000,
1470 	0xbf810000, 0x00000000,
1471 };
1472 
1473 static const u32 vgpr_init_regs[] =
1474 {
1475 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1476 	mmCOMPUTE_RESOURCE_LIMITS, 0,
1477 	mmCOMPUTE_NUM_THREAD_X, 256*4,
1478 	mmCOMPUTE_NUM_THREAD_Y, 1,
1479 	mmCOMPUTE_NUM_THREAD_Z, 1,
1480 	mmCOMPUTE_PGM_RSRC2, 20,
1481 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1482 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1483 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1484 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1485 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1486 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1487 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1488 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1489 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1490 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1491 };
1492 
1493 static const u32 sgpr1_init_regs[] =
1494 {
1495 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1496 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1497 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1498 	mmCOMPUTE_NUM_THREAD_Y, 1,
1499 	mmCOMPUTE_NUM_THREAD_Z, 1,
1500 	mmCOMPUTE_PGM_RSRC2, 20,
1501 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1502 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1503 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1504 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1505 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1506 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1507 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1508 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1509 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1510 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1511 };
1512 
1513 static const u32 sgpr2_init_regs[] =
1514 {
1515 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1516 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1517 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1518 	mmCOMPUTE_NUM_THREAD_Y, 1,
1519 	mmCOMPUTE_NUM_THREAD_Z, 1,
1520 	mmCOMPUTE_PGM_RSRC2, 20,
1521 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1522 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1523 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1524 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1525 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1526 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1527 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1528 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1529 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1530 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1531 };
1532 
1533 static const u32 sec_ded_counter_registers[] =
1534 {
1535 	mmCPC_EDC_ATC_CNT,
1536 	mmCPC_EDC_SCRATCH_CNT,
1537 	mmCPC_EDC_UCODE_CNT,
1538 	mmCPF_EDC_ATC_CNT,
1539 	mmCPF_EDC_ROQ_CNT,
1540 	mmCPF_EDC_TAG_CNT,
1541 	mmCPG_EDC_ATC_CNT,
1542 	mmCPG_EDC_DMA_CNT,
1543 	mmCPG_EDC_TAG_CNT,
1544 	mmDC_EDC_CSINVOC_CNT,
1545 	mmDC_EDC_RESTORE_CNT,
1546 	mmDC_EDC_STATE_CNT,
1547 	mmGDS_EDC_CNT,
1548 	mmGDS_EDC_GRBM_CNT,
1549 	mmGDS_EDC_OA_DED,
1550 	mmSPI_EDC_CNT,
1551 	mmSQC_ATC_EDC_GATCL1_CNT,
1552 	mmSQC_EDC_CNT,
1553 	mmSQ_EDC_DED_CNT,
1554 	mmSQ_EDC_INFO,
1555 	mmSQ_EDC_SEC_CNT,
1556 	mmTCC_EDC_CNT,
1557 	mmTCP_ATC_EDC_GATCL1_CNT,
1558 	mmTCP_EDC_CNT,
1559 	mmTD_EDC_CNT
1560 };
1561 
1562 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1563 {
1564 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1565 	struct amdgpu_ib ib;
1566 	struct dma_fence *f = NULL;
1567 	int r, i;
1568 	u32 tmp;
1569 	unsigned total_size, vgpr_offset, sgpr_offset;
1570 	u64 gpu_addr;
1571 
1572 	/* only supported on CZ */
1573 	if (adev->asic_type != CHIP_CARRIZO)
1574 		return 0;
1575 
1576 	/* bail if the compute ring is not ready */
1577 	if (!ring->ready)
1578 		return 0;
1579 
1580 	tmp = RREG32(mmGB_EDC_MODE);
1581 	WREG32(mmGB_EDC_MODE, 0);
1582 
1583 	total_size =
1584 		(((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1585 	total_size +=
1586 		(((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1587 	total_size +=
1588 		(((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1589 	total_size = ALIGN(total_size, 256);
1590 	vgpr_offset = total_size;
1591 	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1592 	sgpr_offset = total_size;
1593 	total_size += sizeof(sgpr_init_compute_shader);
1594 
1595 	/* allocate an indirect buffer to put the commands in */
1596 	memset(&ib, 0, sizeof(ib));
1597 	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1598 	if (r) {
1599 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1600 		return r;
1601 	}
1602 
1603 	/* load the compute shaders */
1604 	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1605 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1606 
1607 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1608 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1609 
1610 	/* init the ib length to 0 */
1611 	ib.length_dw = 0;
1612 
1613 	/* VGPR */
1614 	/* write the register state for the compute dispatch */
1615 	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1616 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1617 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1618 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1619 	}
1620 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1621 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1622 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1623 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1624 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1625 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1626 
1627 	/* write dispatch packet */
1628 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1629 	ib.ptr[ib.length_dw++] = 8; /* x */
1630 	ib.ptr[ib.length_dw++] = 1; /* y */
1631 	ib.ptr[ib.length_dw++] = 1; /* z */
1632 	ib.ptr[ib.length_dw++] =
1633 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1634 
1635 	/* write CS partial flush packet */
1636 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1637 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1638 
1639 	/* SGPR1 */
1640 	/* write the register state for the compute dispatch */
1641 	for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1642 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1643 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1644 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1645 	}
1646 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1647 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1648 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1649 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1650 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1651 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1652 
1653 	/* write dispatch packet */
1654 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1655 	ib.ptr[ib.length_dw++] = 8; /* x */
1656 	ib.ptr[ib.length_dw++] = 1; /* y */
1657 	ib.ptr[ib.length_dw++] = 1; /* z */
1658 	ib.ptr[ib.length_dw++] =
1659 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1660 
1661 	/* write CS partial flush packet */
1662 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1663 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1664 
1665 	/* SGPR2 */
1666 	/* write the register state for the compute dispatch */
1667 	for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1668 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1669 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1670 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1671 	}
1672 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1673 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1674 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1675 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1676 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1677 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1678 
1679 	/* write dispatch packet */
1680 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1681 	ib.ptr[ib.length_dw++] = 8; /* x */
1682 	ib.ptr[ib.length_dw++] = 1; /* y */
1683 	ib.ptr[ib.length_dw++] = 1; /* z */
1684 	ib.ptr[ib.length_dw++] =
1685 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1686 
1687 	/* write CS partial flush packet */
1688 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1689 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1690 
1691 	/* shedule the ib on the ring */
1692 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
1693 	if (r) {
1694 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1695 		goto fail;
1696 	}
1697 
1698 	/* wait for the GPU to finish processing the IB */
1699 	r = dma_fence_wait(f, false);
1700 	if (r) {
1701 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1702 		goto fail;
1703 	}
1704 
1705 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1706 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1707 	WREG32(mmGB_EDC_MODE, tmp);
1708 
1709 	tmp = RREG32(mmCC_GC_EDC_CONFIG);
1710 	tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1711 	WREG32(mmCC_GC_EDC_CONFIG, tmp);
1712 
1713 
1714 	/* read back registers to clear the counters */
1715 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1716 		RREG32(sec_ded_counter_registers[i]);
1717 
1718 fail:
1719 	amdgpu_ib_free(adev, &ib, NULL);
1720 	dma_fence_put(f);
1721 
1722 	return r;
1723 }
1724 
1725 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1726 {
1727 	u32 gb_addr_config;
1728 	u32 mc_shared_chmap, mc_arb_ramcfg;
1729 	u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1730 	u32 tmp;
1731 	int ret;
1732 
1733 	switch (adev->asic_type) {
1734 	case CHIP_TOPAZ:
1735 		adev->gfx.config.max_shader_engines = 1;
1736 		adev->gfx.config.max_tile_pipes = 2;
1737 		adev->gfx.config.max_cu_per_sh = 6;
1738 		adev->gfx.config.max_sh_per_se = 1;
1739 		adev->gfx.config.max_backends_per_se = 2;
1740 		adev->gfx.config.max_texture_channel_caches = 2;
1741 		adev->gfx.config.max_gprs = 256;
1742 		adev->gfx.config.max_gs_threads = 32;
1743 		adev->gfx.config.max_hw_contexts = 8;
1744 
1745 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1746 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1747 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1748 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1749 		gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1750 		break;
1751 	case CHIP_FIJI:
1752 		adev->gfx.config.max_shader_engines = 4;
1753 		adev->gfx.config.max_tile_pipes = 16;
1754 		adev->gfx.config.max_cu_per_sh = 16;
1755 		adev->gfx.config.max_sh_per_se = 1;
1756 		adev->gfx.config.max_backends_per_se = 4;
1757 		adev->gfx.config.max_texture_channel_caches = 16;
1758 		adev->gfx.config.max_gprs = 256;
1759 		adev->gfx.config.max_gs_threads = 32;
1760 		adev->gfx.config.max_hw_contexts = 8;
1761 
1762 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1763 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1764 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1765 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1766 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1767 		break;
1768 	case CHIP_POLARIS11:
1769 		ret = amdgpu_atombios_get_gfx_info(adev);
1770 		if (ret)
1771 			return ret;
1772 		adev->gfx.config.max_gprs = 256;
1773 		adev->gfx.config.max_gs_threads = 32;
1774 		adev->gfx.config.max_hw_contexts = 8;
1775 
1776 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1777 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1778 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1779 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1780 		gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1781 		break;
1782 	case CHIP_POLARIS10:
1783 		ret = amdgpu_atombios_get_gfx_info(adev);
1784 		if (ret)
1785 			return ret;
1786 		adev->gfx.config.max_gprs = 256;
1787 		adev->gfx.config.max_gs_threads = 32;
1788 		adev->gfx.config.max_hw_contexts = 8;
1789 
1790 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1791 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1792 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1793 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1794 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1795 		break;
1796 	case CHIP_TONGA:
1797 		adev->gfx.config.max_shader_engines = 4;
1798 		adev->gfx.config.max_tile_pipes = 8;
1799 		adev->gfx.config.max_cu_per_sh = 8;
1800 		adev->gfx.config.max_sh_per_se = 1;
1801 		adev->gfx.config.max_backends_per_se = 2;
1802 		adev->gfx.config.max_texture_channel_caches = 8;
1803 		adev->gfx.config.max_gprs = 256;
1804 		adev->gfx.config.max_gs_threads = 32;
1805 		adev->gfx.config.max_hw_contexts = 8;
1806 
1807 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1808 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1809 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1810 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1811 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1812 		break;
1813 	case CHIP_CARRIZO:
1814 		adev->gfx.config.max_shader_engines = 1;
1815 		adev->gfx.config.max_tile_pipes = 2;
1816 		adev->gfx.config.max_sh_per_se = 1;
1817 		adev->gfx.config.max_backends_per_se = 2;
1818 
1819 		switch (adev->pdev->revision) {
1820 		case 0xc4:
1821 		case 0x84:
1822 		case 0xc8:
1823 		case 0xcc:
1824 		case 0xe1:
1825 		case 0xe3:
1826 			/* B10 */
1827 			adev->gfx.config.max_cu_per_sh = 8;
1828 			break;
1829 		case 0xc5:
1830 		case 0x81:
1831 		case 0x85:
1832 		case 0xc9:
1833 		case 0xcd:
1834 		case 0xe2:
1835 		case 0xe4:
1836 			/* B8 */
1837 			adev->gfx.config.max_cu_per_sh = 6;
1838 			break;
1839 		case 0xc6:
1840 		case 0xca:
1841 		case 0xce:
1842 		case 0x88:
1843 			/* B6 */
1844 			adev->gfx.config.max_cu_per_sh = 6;
1845 			break;
1846 		case 0xc7:
1847 		case 0x87:
1848 		case 0xcb:
1849 		case 0xe5:
1850 		case 0x89:
1851 		default:
1852 			/* B4 */
1853 			adev->gfx.config.max_cu_per_sh = 4;
1854 			break;
1855 		}
1856 
1857 		adev->gfx.config.max_texture_channel_caches = 2;
1858 		adev->gfx.config.max_gprs = 256;
1859 		adev->gfx.config.max_gs_threads = 32;
1860 		adev->gfx.config.max_hw_contexts = 8;
1861 
1862 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1863 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1864 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1865 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1866 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1867 		break;
1868 	case CHIP_STONEY:
1869 		adev->gfx.config.max_shader_engines = 1;
1870 		adev->gfx.config.max_tile_pipes = 2;
1871 		adev->gfx.config.max_sh_per_se = 1;
1872 		adev->gfx.config.max_backends_per_se = 1;
1873 
1874 		switch (adev->pdev->revision) {
1875 		case 0xc0:
1876 		case 0xc1:
1877 		case 0xc2:
1878 		case 0xc4:
1879 		case 0xc8:
1880 		case 0xc9:
1881 			adev->gfx.config.max_cu_per_sh = 3;
1882 			break;
1883 		case 0xd0:
1884 		case 0xd1:
1885 		case 0xd2:
1886 		default:
1887 			adev->gfx.config.max_cu_per_sh = 2;
1888 			break;
1889 		}
1890 
1891 		adev->gfx.config.max_texture_channel_caches = 2;
1892 		adev->gfx.config.max_gprs = 256;
1893 		adev->gfx.config.max_gs_threads = 16;
1894 		adev->gfx.config.max_hw_contexts = 8;
1895 
1896 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1897 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1898 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1899 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1900 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1901 		break;
1902 	default:
1903 		adev->gfx.config.max_shader_engines = 2;
1904 		adev->gfx.config.max_tile_pipes = 4;
1905 		adev->gfx.config.max_cu_per_sh = 2;
1906 		adev->gfx.config.max_sh_per_se = 1;
1907 		adev->gfx.config.max_backends_per_se = 2;
1908 		adev->gfx.config.max_texture_channel_caches = 4;
1909 		adev->gfx.config.max_gprs = 256;
1910 		adev->gfx.config.max_gs_threads = 32;
1911 		adev->gfx.config.max_hw_contexts = 8;
1912 
1913 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1914 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1915 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1916 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1917 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1918 		break;
1919 	}
1920 
1921 	mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1922 	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1923 	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1924 
1925 	adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1926 	adev->gfx.config.mem_max_burst_length_bytes = 256;
1927 	if (adev->flags & AMD_IS_APU) {
1928 		/* Get memory bank mapping mode. */
1929 		tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1930 		dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1931 		dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1932 
1933 		tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1934 		dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1935 		dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1936 
1937 		/* Validate settings in case only one DIMM installed. */
1938 		if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1939 			dimm00_addr_map = 0;
1940 		if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1941 			dimm01_addr_map = 0;
1942 		if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1943 			dimm10_addr_map = 0;
1944 		if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1945 			dimm11_addr_map = 0;
1946 
1947 		/* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1948 		/* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1949 		if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1950 			adev->gfx.config.mem_row_size_in_kb = 2;
1951 		else
1952 			adev->gfx.config.mem_row_size_in_kb = 1;
1953 	} else {
1954 		tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1955 		adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1956 		if (adev->gfx.config.mem_row_size_in_kb > 4)
1957 			adev->gfx.config.mem_row_size_in_kb = 4;
1958 	}
1959 
1960 	adev->gfx.config.shader_engine_tile_size = 32;
1961 	adev->gfx.config.num_gpus = 1;
1962 	adev->gfx.config.multi_gpu_tile_size = 64;
1963 
1964 	/* fix up row size */
1965 	switch (adev->gfx.config.mem_row_size_in_kb) {
1966 	case 1:
1967 	default:
1968 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1969 		break;
1970 	case 2:
1971 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1972 		break;
1973 	case 4:
1974 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1975 		break;
1976 	}
1977 	adev->gfx.config.gb_addr_config = gb_addr_config;
1978 
1979 	return 0;
1980 }
1981 
1982 static int gfx_v8_0_sw_init(void *handle)
1983 {
1984 	int i, r;
1985 	struct amdgpu_ring *ring;
1986 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1987 
1988 	/* EOP Event */
1989 	r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1990 	if (r)
1991 		return r;
1992 
1993 	/* Privileged reg */
1994 	r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1995 	if (r)
1996 		return r;
1997 
1998 	/* Privileged inst */
1999 	r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
2000 	if (r)
2001 		return r;
2002 
2003 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2004 
2005 	gfx_v8_0_scratch_init(adev);
2006 
2007 	r = gfx_v8_0_init_microcode(adev);
2008 	if (r) {
2009 		DRM_ERROR("Failed to load gfx firmware!\n");
2010 		return r;
2011 	}
2012 
2013 	r = gfx_v8_0_rlc_init(adev);
2014 	if (r) {
2015 		DRM_ERROR("Failed to init rlc BOs!\n");
2016 		return r;
2017 	}
2018 
2019 	r = gfx_v8_0_mec_init(adev);
2020 	if (r) {
2021 		DRM_ERROR("Failed to init MEC BOs!\n");
2022 		return r;
2023 	}
2024 
2025 	/* set up the gfx ring */
2026 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2027 		ring = &adev->gfx.gfx_ring[i];
2028 		ring->ring_obj = NULL;
2029 		sprintf(ring->name, "gfx");
2030 		/* no gfx doorbells on iceland */
2031 		if (adev->asic_type != CHIP_TOPAZ) {
2032 			ring->use_doorbell = true;
2033 			ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2034 		}
2035 
2036 		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2037 				     AMDGPU_CP_IRQ_GFX_EOP);
2038 		if (r)
2039 			return r;
2040 	}
2041 
2042 	/* set up the compute queues */
2043 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2044 		unsigned irq_type;
2045 
2046 		/* max 32 queues per MEC */
2047 		if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2048 			DRM_ERROR("Too many (%d) compute rings!\n", i);
2049 			break;
2050 		}
2051 		ring = &adev->gfx.compute_ring[i];
2052 		ring->ring_obj = NULL;
2053 		ring->use_doorbell = true;
2054 		ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2055 		ring->me = 1; /* first MEC */
2056 		ring->pipe = i / 8;
2057 		ring->queue = i % 8;
2058 		sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2059 		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2060 		/* type-2 packets are deprecated on MEC, use type-3 instead */
2061 		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2062 				     irq_type);
2063 		if (r)
2064 			return r;
2065 	}
2066 
2067 	/* reserve GDS, GWS and OA resource for gfx */
2068 	r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2069 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2070 				    &adev->gds.gds_gfx_bo, NULL, NULL);
2071 	if (r)
2072 		return r;
2073 
2074 	r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2075 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2076 				    &adev->gds.gws_gfx_bo, NULL, NULL);
2077 	if (r)
2078 		return r;
2079 
2080 	r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2081 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2082 				    &adev->gds.oa_gfx_bo, NULL, NULL);
2083 	if (r)
2084 		return r;
2085 
2086 	adev->gfx.ce_ram_size = 0x8000;
2087 
2088 	r = gfx_v8_0_gpu_early_init(adev);
2089 	if (r)
2090 		return r;
2091 
2092 	return 0;
2093 }
2094 
2095 static int gfx_v8_0_sw_fini(void *handle)
2096 {
2097 	int i;
2098 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2099 
2100 	amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2101 	amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2102 	amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2103 
2104 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2105 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2106 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2107 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2108 
2109 	gfx_v8_0_mec_fini(adev);
2110 	gfx_v8_0_rlc_fini(adev);
2111 	gfx_v8_0_free_microcode(adev);
2112 
2113 	return 0;
2114 }
2115 
2116 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2117 {
2118 	uint32_t *modearray, *mod2array;
2119 	const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2120 	const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2121 	u32 reg_offset;
2122 
2123 	modearray = adev->gfx.config.tile_mode_array;
2124 	mod2array = adev->gfx.config.macrotile_mode_array;
2125 
2126 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2127 		modearray[reg_offset] = 0;
2128 
2129 	for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2130 		mod2array[reg_offset] = 0;
2131 
2132 	switch (adev->asic_type) {
2133 	case CHIP_TOPAZ:
2134 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2135 				PIPE_CONFIG(ADDR_SURF_P2) |
2136 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2137 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2138 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2139 				PIPE_CONFIG(ADDR_SURF_P2) |
2140 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2141 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2142 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2143 				PIPE_CONFIG(ADDR_SURF_P2) |
2144 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2145 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2146 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2147 				PIPE_CONFIG(ADDR_SURF_P2) |
2148 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2149 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2150 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2151 				PIPE_CONFIG(ADDR_SURF_P2) |
2152 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2153 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2154 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2155 				PIPE_CONFIG(ADDR_SURF_P2) |
2156 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2157 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2158 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2159 				PIPE_CONFIG(ADDR_SURF_P2) |
2160 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2161 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2162 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2163 				PIPE_CONFIG(ADDR_SURF_P2));
2164 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2165 				PIPE_CONFIG(ADDR_SURF_P2) |
2166 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2167 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2168 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2169 				 PIPE_CONFIG(ADDR_SURF_P2) |
2170 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2171 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2172 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2173 				 PIPE_CONFIG(ADDR_SURF_P2) |
2174 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2175 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2176 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2177 				 PIPE_CONFIG(ADDR_SURF_P2) |
2178 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2179 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2180 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2181 				 PIPE_CONFIG(ADDR_SURF_P2) |
2182 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2183 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2184 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2185 				 PIPE_CONFIG(ADDR_SURF_P2) |
2186 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2187 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2188 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2189 				 PIPE_CONFIG(ADDR_SURF_P2) |
2190 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2191 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2192 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2193 				 PIPE_CONFIG(ADDR_SURF_P2) |
2194 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2195 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2196 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2197 				 PIPE_CONFIG(ADDR_SURF_P2) |
2198 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2199 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2200 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2201 				 PIPE_CONFIG(ADDR_SURF_P2) |
2202 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2203 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2204 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2205 				 PIPE_CONFIG(ADDR_SURF_P2) |
2206 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2207 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2208 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2209 				 PIPE_CONFIG(ADDR_SURF_P2) |
2210 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2211 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2212 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2213 				 PIPE_CONFIG(ADDR_SURF_P2) |
2214 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2215 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2216 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2217 				 PIPE_CONFIG(ADDR_SURF_P2) |
2218 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2219 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2220 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2221 				 PIPE_CONFIG(ADDR_SURF_P2) |
2222 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2223 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2224 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2225 				 PIPE_CONFIG(ADDR_SURF_P2) |
2226 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2227 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2228 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2229 				 PIPE_CONFIG(ADDR_SURF_P2) |
2230 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2231 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2232 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2233 				 PIPE_CONFIG(ADDR_SURF_P2) |
2234 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2235 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2236 
2237 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2238 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2239 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2240 				NUM_BANKS(ADDR_SURF_8_BANK));
2241 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2242 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2243 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2244 				NUM_BANKS(ADDR_SURF_8_BANK));
2245 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2246 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2247 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2248 				NUM_BANKS(ADDR_SURF_8_BANK));
2249 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2250 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2251 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2252 				NUM_BANKS(ADDR_SURF_8_BANK));
2253 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2254 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2255 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2256 				NUM_BANKS(ADDR_SURF_8_BANK));
2257 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2258 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2259 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2260 				NUM_BANKS(ADDR_SURF_8_BANK));
2261 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2262 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2263 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2264 				NUM_BANKS(ADDR_SURF_8_BANK));
2265 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2266 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2267 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2268 				NUM_BANKS(ADDR_SURF_16_BANK));
2269 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2270 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2271 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2272 				NUM_BANKS(ADDR_SURF_16_BANK));
2273 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2274 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2275 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2276 				 NUM_BANKS(ADDR_SURF_16_BANK));
2277 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2278 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2279 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2280 				 NUM_BANKS(ADDR_SURF_16_BANK));
2281 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2282 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2283 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2284 				 NUM_BANKS(ADDR_SURF_16_BANK));
2285 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2286 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2287 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2288 				 NUM_BANKS(ADDR_SURF_16_BANK));
2289 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2290 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2291 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2292 				 NUM_BANKS(ADDR_SURF_8_BANK));
2293 
2294 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2295 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2296 			    reg_offset != 23)
2297 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2298 
2299 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2300 			if (reg_offset != 7)
2301 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2302 
2303 		break;
2304 	case CHIP_FIJI:
2305 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2306 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2307 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2308 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2309 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2310 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2311 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2312 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2313 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2314 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2315 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2316 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2317 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2318 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2319 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2320 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2321 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2322 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2323 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2324 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2325 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2326 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2327 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2328 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2329 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2330 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2331 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2332 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2333 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2334 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2335 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2336 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2337 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2338 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2339 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2340 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2341 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2342 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2343 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2344 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2345 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2346 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2347 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2348 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2349 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2350 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2351 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2352 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2353 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2354 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2355 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2356 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2357 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2358 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2359 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2360 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2361 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2362 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2363 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2364 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2365 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2366 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2367 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2368 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2369 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2370 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2371 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2372 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2373 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2374 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2375 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2376 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2377 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2378 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2379 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2380 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2381 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2382 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2383 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2384 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2385 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2386 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2387 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2388 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2389 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2390 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2391 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2392 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2393 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2394 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2395 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2396 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2397 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2398 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2399 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2400 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2401 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2402 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2403 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2404 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2405 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2406 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2407 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2408 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2409 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2410 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2411 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2412 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2413 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2414 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2415 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2416 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2417 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2418 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2419 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2420 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2421 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2422 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2423 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2424 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2425 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2426 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2427 
2428 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2429 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2430 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2431 				NUM_BANKS(ADDR_SURF_8_BANK));
2432 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2433 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2434 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2435 				NUM_BANKS(ADDR_SURF_8_BANK));
2436 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2437 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2438 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2439 				NUM_BANKS(ADDR_SURF_8_BANK));
2440 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2441 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2442 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2443 				NUM_BANKS(ADDR_SURF_8_BANK));
2444 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2445 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2446 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2447 				NUM_BANKS(ADDR_SURF_8_BANK));
2448 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2449 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2450 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2451 				NUM_BANKS(ADDR_SURF_8_BANK));
2452 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2453 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2454 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2455 				NUM_BANKS(ADDR_SURF_8_BANK));
2456 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2457 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2458 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2459 				NUM_BANKS(ADDR_SURF_8_BANK));
2460 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2461 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2462 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2463 				NUM_BANKS(ADDR_SURF_8_BANK));
2464 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2466 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2467 				 NUM_BANKS(ADDR_SURF_8_BANK));
2468 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2470 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2471 				 NUM_BANKS(ADDR_SURF_8_BANK));
2472 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2474 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2475 				 NUM_BANKS(ADDR_SURF_8_BANK));
2476 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2478 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2479 				 NUM_BANKS(ADDR_SURF_8_BANK));
2480 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2482 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2483 				 NUM_BANKS(ADDR_SURF_4_BANK));
2484 
2485 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2486 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2487 
2488 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2489 			if (reg_offset != 7)
2490 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2491 
2492 		break;
2493 	case CHIP_TONGA:
2494 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2495 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2496 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2497 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2498 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2499 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2500 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2501 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2502 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2503 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2504 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2505 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2506 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2507 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2508 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2509 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2510 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2511 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2512 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2513 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2514 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2515 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2516 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2517 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2518 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2519 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2520 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2521 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2522 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2523 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2524 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2525 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2526 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2527 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2528 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2529 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2530 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2531 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2532 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2533 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2534 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2535 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2536 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2537 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2538 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2539 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2540 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2541 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2542 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2543 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2544 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2545 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2546 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2547 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2548 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2549 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2550 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2551 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2552 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2553 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2554 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2555 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2556 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2557 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2558 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2559 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2560 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2561 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2562 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2563 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2564 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2565 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2566 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2567 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2568 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2569 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2570 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2571 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2572 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2573 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2574 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2575 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2576 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2577 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2578 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2579 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2580 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2581 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2582 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2583 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2584 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2585 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2586 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2587 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2588 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2589 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2590 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2591 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2592 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2593 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2594 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2595 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2596 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2597 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2598 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2599 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2600 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2601 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2602 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2603 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2604 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2605 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2606 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2607 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2608 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2609 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2610 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2611 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2612 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2613 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2614 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2615 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2616 
2617 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2618 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2619 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2620 				NUM_BANKS(ADDR_SURF_16_BANK));
2621 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2622 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2623 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2624 				NUM_BANKS(ADDR_SURF_16_BANK));
2625 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2626 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2627 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2628 				NUM_BANKS(ADDR_SURF_16_BANK));
2629 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2630 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2631 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2632 				NUM_BANKS(ADDR_SURF_16_BANK));
2633 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2634 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2635 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2636 				NUM_BANKS(ADDR_SURF_16_BANK));
2637 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2638 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2639 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2640 				NUM_BANKS(ADDR_SURF_16_BANK));
2641 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2642 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2643 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2644 				NUM_BANKS(ADDR_SURF_16_BANK));
2645 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2646 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2647 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2648 				NUM_BANKS(ADDR_SURF_16_BANK));
2649 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2650 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2651 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2652 				NUM_BANKS(ADDR_SURF_16_BANK));
2653 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2654 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2655 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2656 				 NUM_BANKS(ADDR_SURF_16_BANK));
2657 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2658 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2659 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2660 				 NUM_BANKS(ADDR_SURF_16_BANK));
2661 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2662 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2663 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2664 				 NUM_BANKS(ADDR_SURF_8_BANK));
2665 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2666 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2667 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2668 				 NUM_BANKS(ADDR_SURF_4_BANK));
2669 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2670 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2671 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2672 				 NUM_BANKS(ADDR_SURF_4_BANK));
2673 
2674 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2675 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2676 
2677 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2678 			if (reg_offset != 7)
2679 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2680 
2681 		break;
2682 	case CHIP_POLARIS11:
2683 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2684 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2685 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2686 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2687 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2688 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2689 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2690 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2691 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2692 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2693 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2694 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2695 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2696 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2697 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2698 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2699 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2700 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2701 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2702 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2703 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2704 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2705 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2706 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2707 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2708 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2709 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2710 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2711 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2712 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2713 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2714 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2715 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2716 				PIPE_CONFIG(ADDR_SURF_P4_16x16));
2717 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2718 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2719 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2720 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2721 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2722 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2723 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2724 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2725 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2726 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2727 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2728 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2729 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2730 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2731 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2732 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2733 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2734 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2735 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2736 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2737 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2738 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2739 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2740 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2741 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2742 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2743 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2744 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2745 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2746 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2747 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2748 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2749 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2750 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2751 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2752 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2753 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2754 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2755 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2756 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2757 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2758 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2759 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2760 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2761 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2762 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2763 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2764 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2765 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2766 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2767 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2768 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2769 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2770 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2771 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2772 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2773 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2774 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2775 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2776 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2777 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2778 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2779 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2780 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2781 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2782 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2783 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2784 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2785 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2786 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2787 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2788 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2789 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2790 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2791 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2792 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2793 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2794 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2795 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2796 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2797 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2798 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2799 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2800 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2801 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2802 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2803 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2804 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2805 
2806 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2807 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2808 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2809 				NUM_BANKS(ADDR_SURF_16_BANK));
2810 
2811 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2812 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2813 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2814 				NUM_BANKS(ADDR_SURF_16_BANK));
2815 
2816 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2817 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2818 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2819 				NUM_BANKS(ADDR_SURF_16_BANK));
2820 
2821 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2822 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2823 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2824 				NUM_BANKS(ADDR_SURF_16_BANK));
2825 
2826 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2827 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2828 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2829 				NUM_BANKS(ADDR_SURF_16_BANK));
2830 
2831 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2832 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2833 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2834 				NUM_BANKS(ADDR_SURF_16_BANK));
2835 
2836 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2837 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2838 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2839 				NUM_BANKS(ADDR_SURF_16_BANK));
2840 
2841 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2842 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2843 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2844 				NUM_BANKS(ADDR_SURF_16_BANK));
2845 
2846 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2847 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2848 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2849 				NUM_BANKS(ADDR_SURF_16_BANK));
2850 
2851 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2852 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2853 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2854 				NUM_BANKS(ADDR_SURF_16_BANK));
2855 
2856 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2857 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2858 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2859 				NUM_BANKS(ADDR_SURF_16_BANK));
2860 
2861 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2862 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2863 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2864 				NUM_BANKS(ADDR_SURF_16_BANK));
2865 
2866 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2867 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2868 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2869 				NUM_BANKS(ADDR_SURF_8_BANK));
2870 
2871 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2872 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2873 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2874 				NUM_BANKS(ADDR_SURF_4_BANK));
2875 
2876 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2877 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2878 
2879 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2880 			if (reg_offset != 7)
2881 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2882 
2883 		break;
2884 	case CHIP_POLARIS10:
2885 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2886 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2887 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2888 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2889 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2890 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2891 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2892 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2893 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2894 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2895 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2896 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2897 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2898 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2899 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2900 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2901 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2902 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2903 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2904 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2905 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2906 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2907 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2908 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2909 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2910 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2911 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2912 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2913 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2914 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2915 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2916 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2917 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2918 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2919 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2920 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2921 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2922 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2923 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2924 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2925 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2926 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2927 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2928 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2929 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2930 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2931 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2932 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2933 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2934 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2935 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2936 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2937 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2938 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2939 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2940 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2941 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2942 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2943 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2944 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2945 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2946 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2947 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2948 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2949 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2950 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2951 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2952 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2953 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2954 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2955 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2956 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2957 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2958 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2959 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2960 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2961 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2962 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2963 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2964 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2965 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2966 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2967 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2968 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2969 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2970 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2971 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2972 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2973 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2974 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2975 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2976 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2977 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2978 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2979 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2980 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2981 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2982 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2983 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2984 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2985 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2986 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2987 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2988 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2989 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2990 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2991 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2992 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2993 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2994 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2995 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2996 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2997 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2998 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2999 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3000 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3001 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3002 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3003 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3004 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3005 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3006 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3007 
3008 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3009 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3010 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3011 				NUM_BANKS(ADDR_SURF_16_BANK));
3012 
3013 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3014 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3015 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3016 				NUM_BANKS(ADDR_SURF_16_BANK));
3017 
3018 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3019 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3020 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3021 				NUM_BANKS(ADDR_SURF_16_BANK));
3022 
3023 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3024 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3025 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3026 				NUM_BANKS(ADDR_SURF_16_BANK));
3027 
3028 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3029 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3030 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3031 				NUM_BANKS(ADDR_SURF_16_BANK));
3032 
3033 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3034 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3035 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3036 				NUM_BANKS(ADDR_SURF_16_BANK));
3037 
3038 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3039 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3040 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3041 				NUM_BANKS(ADDR_SURF_16_BANK));
3042 
3043 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3044 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3045 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3046 				NUM_BANKS(ADDR_SURF_16_BANK));
3047 
3048 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3049 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3050 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3051 				NUM_BANKS(ADDR_SURF_16_BANK));
3052 
3053 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3054 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3055 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3056 				NUM_BANKS(ADDR_SURF_16_BANK));
3057 
3058 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3059 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3060 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3061 				NUM_BANKS(ADDR_SURF_16_BANK));
3062 
3063 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3064 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3065 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3066 				NUM_BANKS(ADDR_SURF_8_BANK));
3067 
3068 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3069 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3070 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3071 				NUM_BANKS(ADDR_SURF_4_BANK));
3072 
3073 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3074 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3075 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3076 				NUM_BANKS(ADDR_SURF_4_BANK));
3077 
3078 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3079 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3080 
3081 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3082 			if (reg_offset != 7)
3083 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3084 
3085 		break;
3086 	case CHIP_STONEY:
3087 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3088 				PIPE_CONFIG(ADDR_SURF_P2) |
3089 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3090 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3091 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3092 				PIPE_CONFIG(ADDR_SURF_P2) |
3093 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3094 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3095 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3096 				PIPE_CONFIG(ADDR_SURF_P2) |
3097 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3098 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3099 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3100 				PIPE_CONFIG(ADDR_SURF_P2) |
3101 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3102 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3103 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3104 				PIPE_CONFIG(ADDR_SURF_P2) |
3105 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3106 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3107 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3108 				PIPE_CONFIG(ADDR_SURF_P2) |
3109 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3110 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3111 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3112 				PIPE_CONFIG(ADDR_SURF_P2) |
3113 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3114 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3115 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3116 				PIPE_CONFIG(ADDR_SURF_P2));
3117 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3118 				PIPE_CONFIG(ADDR_SURF_P2) |
3119 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3120 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3121 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3122 				 PIPE_CONFIG(ADDR_SURF_P2) |
3123 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3124 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3125 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3126 				 PIPE_CONFIG(ADDR_SURF_P2) |
3127 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3128 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3129 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3130 				 PIPE_CONFIG(ADDR_SURF_P2) |
3131 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3132 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3133 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3134 				 PIPE_CONFIG(ADDR_SURF_P2) |
3135 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3136 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3137 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3138 				 PIPE_CONFIG(ADDR_SURF_P2) |
3139 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3140 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3141 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3142 				 PIPE_CONFIG(ADDR_SURF_P2) |
3143 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3144 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3145 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3146 				 PIPE_CONFIG(ADDR_SURF_P2) |
3147 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3148 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3149 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3150 				 PIPE_CONFIG(ADDR_SURF_P2) |
3151 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3152 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3153 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3154 				 PIPE_CONFIG(ADDR_SURF_P2) |
3155 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3156 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3157 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3158 				 PIPE_CONFIG(ADDR_SURF_P2) |
3159 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3160 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3161 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3162 				 PIPE_CONFIG(ADDR_SURF_P2) |
3163 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3164 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3165 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3166 				 PIPE_CONFIG(ADDR_SURF_P2) |
3167 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3168 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3169 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3170 				 PIPE_CONFIG(ADDR_SURF_P2) |
3171 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3172 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3173 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3174 				 PIPE_CONFIG(ADDR_SURF_P2) |
3175 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3176 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3177 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3178 				 PIPE_CONFIG(ADDR_SURF_P2) |
3179 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3180 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3181 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3182 				 PIPE_CONFIG(ADDR_SURF_P2) |
3183 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3184 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3185 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3186 				 PIPE_CONFIG(ADDR_SURF_P2) |
3187 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3188 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3189 
3190 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3191 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3192 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3193 				NUM_BANKS(ADDR_SURF_8_BANK));
3194 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3195 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3196 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3197 				NUM_BANKS(ADDR_SURF_8_BANK));
3198 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3199 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3200 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3201 				NUM_BANKS(ADDR_SURF_8_BANK));
3202 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3203 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3204 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3205 				NUM_BANKS(ADDR_SURF_8_BANK));
3206 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3207 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3208 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3209 				NUM_BANKS(ADDR_SURF_8_BANK));
3210 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3211 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3212 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3213 				NUM_BANKS(ADDR_SURF_8_BANK));
3214 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3215 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3216 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3217 				NUM_BANKS(ADDR_SURF_8_BANK));
3218 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3219 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3220 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3221 				NUM_BANKS(ADDR_SURF_16_BANK));
3222 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3223 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3224 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3225 				NUM_BANKS(ADDR_SURF_16_BANK));
3226 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3227 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3228 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3229 				 NUM_BANKS(ADDR_SURF_16_BANK));
3230 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3231 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3232 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3233 				 NUM_BANKS(ADDR_SURF_16_BANK));
3234 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3235 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3236 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3237 				 NUM_BANKS(ADDR_SURF_16_BANK));
3238 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3239 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3240 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3241 				 NUM_BANKS(ADDR_SURF_16_BANK));
3242 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3243 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3244 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3245 				 NUM_BANKS(ADDR_SURF_8_BANK));
3246 
3247 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3248 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3249 			    reg_offset != 23)
3250 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3251 
3252 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3253 			if (reg_offset != 7)
3254 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3255 
3256 		break;
3257 	default:
3258 		dev_warn(adev->dev,
3259 			 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3260 			 adev->asic_type);
3261 
3262 	case CHIP_CARRIZO:
3263 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3264 				PIPE_CONFIG(ADDR_SURF_P2) |
3265 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3266 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3267 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3268 				PIPE_CONFIG(ADDR_SURF_P2) |
3269 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3270 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3271 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3272 				PIPE_CONFIG(ADDR_SURF_P2) |
3273 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3274 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3275 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3276 				PIPE_CONFIG(ADDR_SURF_P2) |
3277 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3278 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3279 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3280 				PIPE_CONFIG(ADDR_SURF_P2) |
3281 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3282 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3283 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3284 				PIPE_CONFIG(ADDR_SURF_P2) |
3285 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3286 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3287 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3288 				PIPE_CONFIG(ADDR_SURF_P2) |
3289 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3290 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3291 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3292 				PIPE_CONFIG(ADDR_SURF_P2));
3293 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3294 				PIPE_CONFIG(ADDR_SURF_P2) |
3295 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3296 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3297 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3298 				 PIPE_CONFIG(ADDR_SURF_P2) |
3299 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3300 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3301 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3302 				 PIPE_CONFIG(ADDR_SURF_P2) |
3303 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3304 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3305 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3306 				 PIPE_CONFIG(ADDR_SURF_P2) |
3307 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3308 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3309 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3310 				 PIPE_CONFIG(ADDR_SURF_P2) |
3311 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3312 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3313 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3314 				 PIPE_CONFIG(ADDR_SURF_P2) |
3315 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3316 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3317 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3318 				 PIPE_CONFIG(ADDR_SURF_P2) |
3319 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3320 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3321 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3322 				 PIPE_CONFIG(ADDR_SURF_P2) |
3323 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3324 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3325 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3326 				 PIPE_CONFIG(ADDR_SURF_P2) |
3327 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3328 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3329 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3330 				 PIPE_CONFIG(ADDR_SURF_P2) |
3331 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3332 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3333 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3334 				 PIPE_CONFIG(ADDR_SURF_P2) |
3335 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3336 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3337 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3338 				 PIPE_CONFIG(ADDR_SURF_P2) |
3339 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3340 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3341 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3342 				 PIPE_CONFIG(ADDR_SURF_P2) |
3343 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3344 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3345 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3346 				 PIPE_CONFIG(ADDR_SURF_P2) |
3347 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3348 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3349 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3350 				 PIPE_CONFIG(ADDR_SURF_P2) |
3351 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3352 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3353 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3354 				 PIPE_CONFIG(ADDR_SURF_P2) |
3355 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3356 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3357 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3358 				 PIPE_CONFIG(ADDR_SURF_P2) |
3359 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3360 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3361 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3362 				 PIPE_CONFIG(ADDR_SURF_P2) |
3363 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3364 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3365 
3366 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3367 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3368 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3369 				NUM_BANKS(ADDR_SURF_8_BANK));
3370 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3371 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3372 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3373 				NUM_BANKS(ADDR_SURF_8_BANK));
3374 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3375 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3376 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3377 				NUM_BANKS(ADDR_SURF_8_BANK));
3378 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3379 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3380 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3381 				NUM_BANKS(ADDR_SURF_8_BANK));
3382 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3383 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3384 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3385 				NUM_BANKS(ADDR_SURF_8_BANK));
3386 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3387 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3388 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3389 				NUM_BANKS(ADDR_SURF_8_BANK));
3390 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3391 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3392 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3393 				NUM_BANKS(ADDR_SURF_8_BANK));
3394 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3395 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3396 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3397 				NUM_BANKS(ADDR_SURF_16_BANK));
3398 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3399 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3400 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3401 				NUM_BANKS(ADDR_SURF_16_BANK));
3402 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3403 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3404 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3405 				 NUM_BANKS(ADDR_SURF_16_BANK));
3406 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3407 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3408 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3409 				 NUM_BANKS(ADDR_SURF_16_BANK));
3410 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3411 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3412 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3413 				 NUM_BANKS(ADDR_SURF_16_BANK));
3414 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3415 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3416 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3417 				 NUM_BANKS(ADDR_SURF_16_BANK));
3418 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3419 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3420 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3421 				 NUM_BANKS(ADDR_SURF_8_BANK));
3422 
3423 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3424 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3425 			    reg_offset != 23)
3426 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3427 
3428 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3429 			if (reg_offset != 7)
3430 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3431 
3432 		break;
3433 	}
3434 }
3435 
3436 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3437 				  u32 se_num, u32 sh_num, u32 instance)
3438 {
3439 	u32 data;
3440 
3441 	if (instance == 0xffffffff)
3442 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3443 	else
3444 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3445 
3446 	if (se_num == 0xffffffff)
3447 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3448 	else
3449 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3450 
3451 	if (sh_num == 0xffffffff)
3452 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3453 	else
3454 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3455 
3456 	WREG32(mmGRBM_GFX_INDEX, data);
3457 }
3458 
3459 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3460 {
3461 	return (u32)((1ULL << bit_width) - 1);
3462 }
3463 
3464 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3465 {
3466 	u32 data, mask;
3467 
3468 	data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3469 		RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3470 
3471 	data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3472 
3473 	mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3474 				       adev->gfx.config.max_sh_per_se);
3475 
3476 	return (~data) & mask;
3477 }
3478 
3479 static void
3480 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3481 {
3482 	switch (adev->asic_type) {
3483 	case CHIP_FIJI:
3484 		*rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3485 			  RB_XSEL2(1) | PKR_MAP(2) |
3486 			  PKR_XSEL(1) | PKR_YSEL(1) |
3487 			  SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3488 		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3489 			   SE_PAIR_YSEL(2);
3490 		break;
3491 	case CHIP_TONGA:
3492 	case CHIP_POLARIS10:
3493 		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3494 			  SE_XSEL(1) | SE_YSEL(1);
3495 		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3496 			   SE_PAIR_YSEL(2);
3497 		break;
3498 	case CHIP_TOPAZ:
3499 	case CHIP_CARRIZO:
3500 		*rconf |= RB_MAP_PKR0(2);
3501 		*rconf1 |= 0x0;
3502 		break;
3503 	case CHIP_POLARIS11:
3504 		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3505 			  SE_XSEL(1) | SE_YSEL(1);
3506 		*rconf1 |= 0x0;
3507 		break;
3508 	case CHIP_STONEY:
3509 		*rconf |= 0x0;
3510 		*rconf1 |= 0x0;
3511 		break;
3512 	default:
3513 		DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3514 		break;
3515 	}
3516 }
3517 
3518 static void
3519 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3520 					u32 raster_config, u32 raster_config_1,
3521 					unsigned rb_mask, unsigned num_rb)
3522 {
3523 	unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3524 	unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3525 	unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3526 	unsigned rb_per_se = num_rb / num_se;
3527 	unsigned se_mask[4];
3528 	unsigned se;
3529 
3530 	se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3531 	se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3532 	se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3533 	se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3534 
3535 	WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3536 	WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3537 	WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3538 
3539 	if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3540 			     (!se_mask[2] && !se_mask[3]))) {
3541 		raster_config_1 &= ~SE_PAIR_MAP_MASK;
3542 
3543 		if (!se_mask[0] && !se_mask[1]) {
3544 			raster_config_1 |=
3545 				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3546 		} else {
3547 			raster_config_1 |=
3548 				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3549 		}
3550 	}
3551 
3552 	for (se = 0; se < num_se; se++) {
3553 		unsigned raster_config_se = raster_config;
3554 		unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3555 		unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3556 		int idx = (se / 2) * 2;
3557 
3558 		if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3559 			raster_config_se &= ~SE_MAP_MASK;
3560 
3561 			if (!se_mask[idx]) {
3562 				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3563 			} else {
3564 				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3565 			}
3566 		}
3567 
3568 		pkr0_mask &= rb_mask;
3569 		pkr1_mask &= rb_mask;
3570 		if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3571 			raster_config_se &= ~PKR_MAP_MASK;
3572 
3573 			if (!pkr0_mask) {
3574 				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3575 			} else {
3576 				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3577 			}
3578 		}
3579 
3580 		if (rb_per_se >= 2) {
3581 			unsigned rb0_mask = 1 << (se * rb_per_se);
3582 			unsigned rb1_mask = rb0_mask << 1;
3583 
3584 			rb0_mask &= rb_mask;
3585 			rb1_mask &= rb_mask;
3586 			if (!rb0_mask || !rb1_mask) {
3587 				raster_config_se &= ~RB_MAP_PKR0_MASK;
3588 
3589 				if (!rb0_mask) {
3590 					raster_config_se |=
3591 						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3592 				} else {
3593 					raster_config_se |=
3594 						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3595 				}
3596 			}
3597 
3598 			if (rb_per_se > 2) {
3599 				rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3600 				rb1_mask = rb0_mask << 1;
3601 				rb0_mask &= rb_mask;
3602 				rb1_mask &= rb_mask;
3603 				if (!rb0_mask || !rb1_mask) {
3604 					raster_config_se &= ~RB_MAP_PKR1_MASK;
3605 
3606 					if (!rb0_mask) {
3607 						raster_config_se |=
3608 							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3609 					} else {
3610 						raster_config_se |=
3611 							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3612 					}
3613 				}
3614 			}
3615 		}
3616 
3617 		/* GRBM_GFX_INDEX has a different offset on VI */
3618 		gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3619 		WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3620 		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3621 	}
3622 
3623 	/* GRBM_GFX_INDEX has a different offset on VI */
3624 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3625 }
3626 
3627 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3628 {
3629 	int i, j;
3630 	u32 data;
3631 	u32 raster_config = 0, raster_config_1 = 0;
3632 	u32 active_rbs = 0;
3633 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3634 					adev->gfx.config.max_sh_per_se;
3635 	unsigned num_rb_pipes;
3636 
3637 	mutex_lock(&adev->grbm_idx_mutex);
3638 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3639 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3640 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3641 			data = gfx_v8_0_get_rb_active_bitmap(adev);
3642 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3643 					       rb_bitmap_width_per_sh);
3644 		}
3645 	}
3646 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3647 
3648 	adev->gfx.config.backend_enable_mask = active_rbs;
3649 	adev->gfx.config.num_rbs = hweight32(active_rbs);
3650 
3651 	num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3652 			     adev->gfx.config.max_shader_engines, 16);
3653 
3654 	gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3655 
3656 	if (!adev->gfx.config.backend_enable_mask ||
3657 			adev->gfx.config.num_rbs >= num_rb_pipes) {
3658 		WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3659 		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3660 	} else {
3661 		gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3662 							adev->gfx.config.backend_enable_mask,
3663 							num_rb_pipes);
3664 	}
3665 
3666 	/* cache the values for userspace */
3667 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3668 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3669 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3670 			adev->gfx.config.rb_config[i][j].rb_backend_disable =
3671 				RREG32(mmCC_RB_BACKEND_DISABLE);
3672 			adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3673 				RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3674 			adev->gfx.config.rb_config[i][j].raster_config =
3675 				RREG32(mmPA_SC_RASTER_CONFIG);
3676 			adev->gfx.config.rb_config[i][j].raster_config_1 =
3677 				RREG32(mmPA_SC_RASTER_CONFIG_1);
3678 		}
3679 	}
3680 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3681 	mutex_unlock(&adev->grbm_idx_mutex);
3682 }
3683 
3684 /**
3685  * gfx_v8_0_init_compute_vmid - gart enable
3686  *
3687  * @rdev: amdgpu_device pointer
3688  *
3689  * Initialize compute vmid sh_mem registers
3690  *
3691  */
3692 #define DEFAULT_SH_MEM_BASES	(0x6000)
3693 #define FIRST_COMPUTE_VMID	(8)
3694 #define LAST_COMPUTE_VMID	(16)
3695 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3696 {
3697 	int i;
3698 	uint32_t sh_mem_config;
3699 	uint32_t sh_mem_bases;
3700 
3701 	/*
3702 	 * Configure apertures:
3703 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3704 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3705 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3706 	 */
3707 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3708 
3709 	sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3710 			SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3711 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3712 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3713 			MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3714 			SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3715 
3716 	mutex_lock(&adev->srbm_mutex);
3717 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3718 		vi_srbm_select(adev, 0, 0, 0, i);
3719 		/* CP and shaders */
3720 		WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3721 		WREG32(mmSH_MEM_APE1_BASE, 1);
3722 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3723 		WREG32(mmSH_MEM_BASES, sh_mem_bases);
3724 	}
3725 	vi_srbm_select(adev, 0, 0, 0, 0);
3726 	mutex_unlock(&adev->srbm_mutex);
3727 }
3728 
3729 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3730 {
3731 	u32 tmp;
3732 	int i;
3733 
3734 	WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3735 	WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3736 	WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3737 	WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3738 
3739 	gfx_v8_0_tiling_mode_table_init(adev);
3740 	gfx_v8_0_setup_rb(adev);
3741 	gfx_v8_0_get_cu_info(adev);
3742 
3743 	/* XXX SH_MEM regs */
3744 	/* where to put LDS, scratch, GPUVM in FSA64 space */
3745 	mutex_lock(&adev->srbm_mutex);
3746 	for (i = 0; i < 16; i++) {
3747 		vi_srbm_select(adev, 0, 0, 0, i);
3748 		/* CP and shaders */
3749 		if (i == 0) {
3750 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3751 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3752 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3753 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3754 			WREG32(mmSH_MEM_CONFIG, tmp);
3755 		} else {
3756 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3757 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3758 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3759 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3760 			WREG32(mmSH_MEM_CONFIG, tmp);
3761 		}
3762 
3763 		WREG32(mmSH_MEM_APE1_BASE, 1);
3764 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3765 		WREG32(mmSH_MEM_BASES, 0);
3766 	}
3767 	vi_srbm_select(adev, 0, 0, 0, 0);
3768 	mutex_unlock(&adev->srbm_mutex);
3769 
3770 	gfx_v8_0_init_compute_vmid(adev);
3771 
3772 	mutex_lock(&adev->grbm_idx_mutex);
3773 	/*
3774 	 * making sure that the following register writes will be broadcasted
3775 	 * to all the shaders
3776 	 */
3777 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3778 
3779 	WREG32(mmPA_SC_FIFO_SIZE,
3780 		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
3781 			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3782 		   (adev->gfx.config.sc_prim_fifo_size_backend <<
3783 			PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3784 		   (adev->gfx.config.sc_hiz_tile_fifo_size <<
3785 			PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3786 		   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3787 			PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3788 	mutex_unlock(&adev->grbm_idx_mutex);
3789 
3790 }
3791 
3792 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3793 {
3794 	u32 i, j, k;
3795 	u32 mask;
3796 
3797 	mutex_lock(&adev->grbm_idx_mutex);
3798 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3799 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3800 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3801 			for (k = 0; k < adev->usec_timeout; k++) {
3802 				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3803 					break;
3804 				udelay(1);
3805 			}
3806 		}
3807 	}
3808 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3809 	mutex_unlock(&adev->grbm_idx_mutex);
3810 
3811 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3812 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3813 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3814 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3815 	for (k = 0; k < adev->usec_timeout; k++) {
3816 		if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3817 			break;
3818 		udelay(1);
3819 	}
3820 }
3821 
3822 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3823 					       bool enable)
3824 {
3825 	u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3826 
3827 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3828 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3829 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3830 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3831 
3832 	WREG32(mmCP_INT_CNTL_RING0, tmp);
3833 }
3834 
3835 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3836 {
3837 	/* csib */
3838 	WREG32(mmRLC_CSIB_ADDR_HI,
3839 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
3840 	WREG32(mmRLC_CSIB_ADDR_LO,
3841 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3842 	WREG32(mmRLC_CSIB_LENGTH,
3843 			adev->gfx.rlc.clear_state_size);
3844 }
3845 
3846 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3847 				int ind_offset,
3848 				int list_size,
3849 				int *unique_indices,
3850 				int *indices_count,
3851 				int max_indices,
3852 				int *ind_start_offsets,
3853 				int *offset_count,
3854 				int max_offset)
3855 {
3856 	int indices;
3857 	bool new_entry = true;
3858 
3859 	for (; ind_offset < list_size; ind_offset++) {
3860 
3861 		if (new_entry) {
3862 			new_entry = false;
3863 			ind_start_offsets[*offset_count] = ind_offset;
3864 			*offset_count = *offset_count + 1;
3865 			BUG_ON(*offset_count >= max_offset);
3866 		}
3867 
3868 		if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3869 			new_entry = true;
3870 			continue;
3871 		}
3872 
3873 		ind_offset += 2;
3874 
3875 		/* look for the matching indice */
3876 		for (indices = 0;
3877 			indices < *indices_count;
3878 			indices++) {
3879 			if (unique_indices[indices] ==
3880 				register_list_format[ind_offset])
3881 				break;
3882 		}
3883 
3884 		if (indices >= *indices_count) {
3885 			unique_indices[*indices_count] =
3886 				register_list_format[ind_offset];
3887 			indices = *indices_count;
3888 			*indices_count = *indices_count + 1;
3889 			BUG_ON(*indices_count >= max_indices);
3890 		}
3891 
3892 		register_list_format[ind_offset] = indices;
3893 	}
3894 }
3895 
3896 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3897 {
3898 	int i, temp, data;
3899 	int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3900 	int indices_count = 0;
3901 	int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3902 	int offset_count = 0;
3903 
3904 	int list_size;
3905 	unsigned int *register_list_format =
3906 		kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3907 	if (!register_list_format)
3908 		return -ENOMEM;
3909 	memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3910 			adev->gfx.rlc.reg_list_format_size_bytes);
3911 
3912 	gfx_v8_0_parse_ind_reg_list(register_list_format,
3913 				RLC_FormatDirectRegListLength,
3914 				adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3915 				unique_indices,
3916 				&indices_count,
3917 				sizeof(unique_indices) / sizeof(int),
3918 				indirect_start_offsets,
3919 				&offset_count,
3920 				sizeof(indirect_start_offsets)/sizeof(int));
3921 
3922 	/* save and restore list */
3923 	WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3924 
3925 	WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3926 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3927 		WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3928 
3929 	/* indirect list */
3930 	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3931 	for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3932 		WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3933 
3934 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3935 	list_size = list_size >> 1;
3936 	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3937 	WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3938 
3939 	/* starting offsets starts */
3940 	WREG32(mmRLC_GPM_SCRATCH_ADDR,
3941 		adev->gfx.rlc.starting_offsets_start);
3942 	for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3943 		WREG32(mmRLC_GPM_SCRATCH_DATA,
3944 				indirect_start_offsets[i]);
3945 
3946 	/* unique indices */
3947 	temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3948 	data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3949 	for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3950 		amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false);
3951 		amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false);
3952 	}
3953 	kfree(register_list_format);
3954 
3955 	return 0;
3956 }
3957 
3958 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3959 {
3960 	WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3961 }
3962 
3963 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3964 {
3965 	uint32_t data;
3966 
3967 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3968 			      AMD_PG_SUPPORT_GFX_SMG |
3969 			      AMD_PG_SUPPORT_GFX_DMG)) {
3970 		WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
3971 
3972 		data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
3973 		data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
3974 		data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
3975 		data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
3976 		WREG32(mmRLC_PG_DELAY, data);
3977 
3978 		WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
3979 		WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
3980 	}
3981 }
3982 
3983 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
3984 						bool enable)
3985 {
3986 	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
3987 }
3988 
3989 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
3990 						  bool enable)
3991 {
3992 	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
3993 }
3994 
3995 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
3996 {
3997 	WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 1 : 0);
3998 }
3999 
4000 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4001 {
4002 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
4003 			      AMD_PG_SUPPORT_GFX_SMG |
4004 			      AMD_PG_SUPPORT_GFX_DMG |
4005 			      AMD_PG_SUPPORT_CP |
4006 			      AMD_PG_SUPPORT_GDS |
4007 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
4008 		gfx_v8_0_init_csb(adev);
4009 		gfx_v8_0_init_save_restore_list(adev);
4010 		gfx_v8_0_enable_save_restore_machine(adev);
4011 
4012 		if ((adev->asic_type == CHIP_CARRIZO) ||
4013 		    (adev->asic_type == CHIP_STONEY)) {
4014 			WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4015 			gfx_v8_0_init_power_gating(adev);
4016 			WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4017 			if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4018 				cz_enable_sck_slow_down_on_power_up(adev, true);
4019 				cz_enable_sck_slow_down_on_power_down(adev, true);
4020 			} else {
4021 				cz_enable_sck_slow_down_on_power_up(adev, false);
4022 				cz_enable_sck_slow_down_on_power_down(adev, false);
4023 			}
4024 			if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4025 				cz_enable_cp_power_gating(adev, true);
4026 			else
4027 				cz_enable_cp_power_gating(adev, false);
4028 		} else if (adev->asic_type == CHIP_POLARIS11) {
4029 			gfx_v8_0_init_power_gating(adev);
4030 		}
4031 	}
4032 }
4033 
4034 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4035 {
4036 	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4037 
4038 	gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4039 	gfx_v8_0_wait_for_rlc_serdes(adev);
4040 }
4041 
4042 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4043 {
4044 	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4045 	udelay(50);
4046 
4047 	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4048 	udelay(50);
4049 }
4050 
4051 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4052 {
4053 	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4054 
4055 	/* carrizo do enable cp interrupt after cp inited */
4056 	if (!(adev->flags & AMD_IS_APU))
4057 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4058 
4059 	udelay(50);
4060 }
4061 
4062 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4063 {
4064 	const struct rlc_firmware_header_v2_0 *hdr;
4065 	const __le32 *fw_data;
4066 	unsigned i, fw_size;
4067 
4068 	if (!adev->gfx.rlc_fw)
4069 		return -EINVAL;
4070 
4071 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4072 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
4073 
4074 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4075 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4076 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4077 
4078 	WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4079 	for (i = 0; i < fw_size; i++)
4080 		WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4081 	WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4082 
4083 	return 0;
4084 }
4085 
4086 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4087 {
4088 	int r;
4089 	u32 tmp;
4090 
4091 	gfx_v8_0_rlc_stop(adev);
4092 
4093 	/* disable CG */
4094 	tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4095 	tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4096 		 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4097 	WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4098 	if (adev->asic_type == CHIP_POLARIS11 ||
4099 	    adev->asic_type == CHIP_POLARIS10) {
4100 		tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4101 		tmp &= ~0x3;
4102 		WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4103 	}
4104 
4105 	/* disable PG */
4106 	WREG32(mmRLC_PG_CNTL, 0);
4107 
4108 	gfx_v8_0_rlc_reset(adev);
4109 	gfx_v8_0_init_pg(adev);
4110 
4111 	if (!adev->pp_enabled) {
4112 		if (!adev->firmware.smu_load) {
4113 			/* legacy rlc firmware loading */
4114 			r = gfx_v8_0_rlc_load_microcode(adev);
4115 			if (r)
4116 				return r;
4117 		} else {
4118 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4119 							AMDGPU_UCODE_ID_RLC_G);
4120 			if (r)
4121 				return -EINVAL;
4122 		}
4123 	}
4124 
4125 	gfx_v8_0_rlc_start(adev);
4126 
4127 	return 0;
4128 }
4129 
4130 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4131 {
4132 	int i;
4133 	u32 tmp = RREG32(mmCP_ME_CNTL);
4134 
4135 	if (enable) {
4136 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4137 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4138 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4139 	} else {
4140 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4141 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4142 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4143 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4144 			adev->gfx.gfx_ring[i].ready = false;
4145 	}
4146 	WREG32(mmCP_ME_CNTL, tmp);
4147 	udelay(50);
4148 }
4149 
4150 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4151 {
4152 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
4153 	const struct gfx_firmware_header_v1_0 *ce_hdr;
4154 	const struct gfx_firmware_header_v1_0 *me_hdr;
4155 	const __le32 *fw_data;
4156 	unsigned i, fw_size;
4157 
4158 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4159 		return -EINVAL;
4160 
4161 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4162 		adev->gfx.pfp_fw->data;
4163 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4164 		adev->gfx.ce_fw->data;
4165 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
4166 		adev->gfx.me_fw->data;
4167 
4168 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4169 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4170 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4171 
4172 	gfx_v8_0_cp_gfx_enable(adev, false);
4173 
4174 	/* PFP */
4175 	fw_data = (const __le32 *)
4176 		(adev->gfx.pfp_fw->data +
4177 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4178 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4179 	WREG32(mmCP_PFP_UCODE_ADDR, 0);
4180 	for (i = 0; i < fw_size; i++)
4181 		WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4182 	WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4183 
4184 	/* CE */
4185 	fw_data = (const __le32 *)
4186 		(adev->gfx.ce_fw->data +
4187 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4188 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4189 	WREG32(mmCP_CE_UCODE_ADDR, 0);
4190 	for (i = 0; i < fw_size; i++)
4191 		WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4192 	WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4193 
4194 	/* ME */
4195 	fw_data = (const __le32 *)
4196 		(adev->gfx.me_fw->data +
4197 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4198 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4199 	WREG32(mmCP_ME_RAM_WADDR, 0);
4200 	for (i = 0; i < fw_size; i++)
4201 		WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4202 	WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4203 
4204 	return 0;
4205 }
4206 
4207 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4208 {
4209 	u32 count = 0;
4210 	const struct cs_section_def *sect = NULL;
4211 	const struct cs_extent_def *ext = NULL;
4212 
4213 	/* begin clear state */
4214 	count += 2;
4215 	/* context control state */
4216 	count += 3;
4217 
4218 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4219 		for (ext = sect->section; ext->extent != NULL; ++ext) {
4220 			if (sect->id == SECT_CONTEXT)
4221 				count += 2 + ext->reg_count;
4222 			else
4223 				return 0;
4224 		}
4225 	}
4226 	/* pa_sc_raster_config/pa_sc_raster_config1 */
4227 	count += 4;
4228 	/* end clear state */
4229 	count += 2;
4230 	/* clear state */
4231 	count += 2;
4232 
4233 	return count;
4234 }
4235 
4236 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4237 {
4238 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4239 	const struct cs_section_def *sect = NULL;
4240 	const struct cs_extent_def *ext = NULL;
4241 	int r, i;
4242 
4243 	/* init the CP */
4244 	WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4245 	WREG32(mmCP_ENDIAN_SWAP, 0);
4246 	WREG32(mmCP_DEVICE_ID, 1);
4247 
4248 	gfx_v8_0_cp_gfx_enable(adev, true);
4249 
4250 	r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4251 	if (r) {
4252 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4253 		return r;
4254 	}
4255 
4256 	/* clear state buffer */
4257 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4258 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4259 
4260 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4261 	amdgpu_ring_write(ring, 0x80000000);
4262 	amdgpu_ring_write(ring, 0x80000000);
4263 
4264 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4265 		for (ext = sect->section; ext->extent != NULL; ++ext) {
4266 			if (sect->id == SECT_CONTEXT) {
4267 				amdgpu_ring_write(ring,
4268 				       PACKET3(PACKET3_SET_CONTEXT_REG,
4269 					       ext->reg_count));
4270 				amdgpu_ring_write(ring,
4271 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4272 				for (i = 0; i < ext->reg_count; i++)
4273 					amdgpu_ring_write(ring, ext->extent[i]);
4274 			}
4275 		}
4276 	}
4277 
4278 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4279 	amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4280 	switch (adev->asic_type) {
4281 	case CHIP_TONGA:
4282 	case CHIP_POLARIS10:
4283 		amdgpu_ring_write(ring, 0x16000012);
4284 		amdgpu_ring_write(ring, 0x0000002A);
4285 		break;
4286 	case CHIP_POLARIS11:
4287 		amdgpu_ring_write(ring, 0x16000012);
4288 		amdgpu_ring_write(ring, 0x00000000);
4289 		break;
4290 	case CHIP_FIJI:
4291 		amdgpu_ring_write(ring, 0x3a00161a);
4292 		amdgpu_ring_write(ring, 0x0000002e);
4293 		break;
4294 	case CHIP_CARRIZO:
4295 		amdgpu_ring_write(ring, 0x00000002);
4296 		amdgpu_ring_write(ring, 0x00000000);
4297 		break;
4298 	case CHIP_TOPAZ:
4299 		amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4300 				0x00000000 : 0x00000002);
4301 		amdgpu_ring_write(ring, 0x00000000);
4302 		break;
4303 	case CHIP_STONEY:
4304 		amdgpu_ring_write(ring, 0x00000000);
4305 		amdgpu_ring_write(ring, 0x00000000);
4306 		break;
4307 	default:
4308 		BUG();
4309 	}
4310 
4311 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4312 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4313 
4314 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4315 	amdgpu_ring_write(ring, 0);
4316 
4317 	/* init the CE partitions */
4318 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4319 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4320 	amdgpu_ring_write(ring, 0x8000);
4321 	amdgpu_ring_write(ring, 0x8000);
4322 
4323 	amdgpu_ring_commit(ring);
4324 
4325 	return 0;
4326 }
4327 
4328 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4329 {
4330 	struct amdgpu_ring *ring;
4331 	u32 tmp;
4332 	u32 rb_bufsz;
4333 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
4334 	int r;
4335 
4336 	/* Set the write pointer delay */
4337 	WREG32(mmCP_RB_WPTR_DELAY, 0);
4338 
4339 	/* set the RB to use vmid 0 */
4340 	WREG32(mmCP_RB_VMID, 0);
4341 
4342 	/* Set ring buffer size */
4343 	ring = &adev->gfx.gfx_ring[0];
4344 	rb_bufsz = order_base_2(ring->ring_size / 8);
4345 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4346 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4347 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4348 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4349 #ifdef __BIG_ENDIAN
4350 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4351 #endif
4352 	WREG32(mmCP_RB0_CNTL, tmp);
4353 
4354 	/* Initialize the ring buffer's read and write pointers */
4355 	WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4356 	ring->wptr = 0;
4357 	WREG32(mmCP_RB0_WPTR, ring->wptr);
4358 
4359 	/* set the wb address wether it's enabled or not */
4360 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4361 	WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4362 	WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4363 
4364 	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4365 	WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4366 	WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4367 	mdelay(1);
4368 	WREG32(mmCP_RB0_CNTL, tmp);
4369 
4370 	rb_addr = ring->gpu_addr >> 8;
4371 	WREG32(mmCP_RB0_BASE, rb_addr);
4372 	WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4373 
4374 	/* no gfx doorbells on iceland */
4375 	if (adev->asic_type != CHIP_TOPAZ) {
4376 		tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4377 		if (ring->use_doorbell) {
4378 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4379 					    DOORBELL_OFFSET, ring->doorbell_index);
4380 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4381 					    DOORBELL_HIT, 0);
4382 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4383 					    DOORBELL_EN, 1);
4384 		} else {
4385 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4386 					    DOORBELL_EN, 0);
4387 		}
4388 		WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4389 
4390 		if (adev->asic_type == CHIP_TONGA) {
4391 			tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4392 					    DOORBELL_RANGE_LOWER,
4393 					    AMDGPU_DOORBELL_GFX_RING0);
4394 			WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4395 
4396 			WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4397 			       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4398 		}
4399 
4400 	}
4401 
4402 	/* start the ring */
4403 	gfx_v8_0_cp_gfx_start(adev);
4404 	ring->ready = true;
4405 	r = amdgpu_ring_test_ring(ring);
4406 	if (r)
4407 		ring->ready = false;
4408 
4409 	return r;
4410 }
4411 
4412 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4413 {
4414 	int i;
4415 
4416 	if (enable) {
4417 		WREG32(mmCP_MEC_CNTL, 0);
4418 	} else {
4419 		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4420 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
4421 			adev->gfx.compute_ring[i].ready = false;
4422 	}
4423 	udelay(50);
4424 }
4425 
4426 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4427 {
4428 	const struct gfx_firmware_header_v1_0 *mec_hdr;
4429 	const __le32 *fw_data;
4430 	unsigned i, fw_size;
4431 
4432 	if (!adev->gfx.mec_fw)
4433 		return -EINVAL;
4434 
4435 	gfx_v8_0_cp_compute_enable(adev, false);
4436 
4437 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4438 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4439 
4440 	fw_data = (const __le32 *)
4441 		(adev->gfx.mec_fw->data +
4442 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4443 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4444 
4445 	/* MEC1 */
4446 	WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4447 	for (i = 0; i < fw_size; i++)
4448 		WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4449 	WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4450 
4451 	/* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4452 	if (adev->gfx.mec2_fw) {
4453 		const struct gfx_firmware_header_v1_0 *mec2_hdr;
4454 
4455 		mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4456 		amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4457 
4458 		fw_data = (const __le32 *)
4459 			(adev->gfx.mec2_fw->data +
4460 			 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4461 		fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4462 
4463 		WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4464 		for (i = 0; i < fw_size; i++)
4465 			WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4466 		WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4467 	}
4468 
4469 	return 0;
4470 }
4471 
4472 struct vi_mqd {
4473 	uint32_t header;  /* ordinal0 */
4474 	uint32_t compute_dispatch_initiator;  /* ordinal1 */
4475 	uint32_t compute_dim_x;  /* ordinal2 */
4476 	uint32_t compute_dim_y;  /* ordinal3 */
4477 	uint32_t compute_dim_z;  /* ordinal4 */
4478 	uint32_t compute_start_x;  /* ordinal5 */
4479 	uint32_t compute_start_y;  /* ordinal6 */
4480 	uint32_t compute_start_z;  /* ordinal7 */
4481 	uint32_t compute_num_thread_x;  /* ordinal8 */
4482 	uint32_t compute_num_thread_y;  /* ordinal9 */
4483 	uint32_t compute_num_thread_z;  /* ordinal10 */
4484 	uint32_t compute_pipelinestat_enable;  /* ordinal11 */
4485 	uint32_t compute_perfcount_enable;  /* ordinal12 */
4486 	uint32_t compute_pgm_lo;  /* ordinal13 */
4487 	uint32_t compute_pgm_hi;  /* ordinal14 */
4488 	uint32_t compute_tba_lo;  /* ordinal15 */
4489 	uint32_t compute_tba_hi;  /* ordinal16 */
4490 	uint32_t compute_tma_lo;  /* ordinal17 */
4491 	uint32_t compute_tma_hi;  /* ordinal18 */
4492 	uint32_t compute_pgm_rsrc1;  /* ordinal19 */
4493 	uint32_t compute_pgm_rsrc2;  /* ordinal20 */
4494 	uint32_t compute_vmid;  /* ordinal21 */
4495 	uint32_t compute_resource_limits;  /* ordinal22 */
4496 	uint32_t compute_static_thread_mgmt_se0;  /* ordinal23 */
4497 	uint32_t compute_static_thread_mgmt_se1;  /* ordinal24 */
4498 	uint32_t compute_tmpring_size;  /* ordinal25 */
4499 	uint32_t compute_static_thread_mgmt_se2;  /* ordinal26 */
4500 	uint32_t compute_static_thread_mgmt_se3;  /* ordinal27 */
4501 	uint32_t compute_restart_x;  /* ordinal28 */
4502 	uint32_t compute_restart_y;  /* ordinal29 */
4503 	uint32_t compute_restart_z;  /* ordinal30 */
4504 	uint32_t compute_thread_trace_enable;  /* ordinal31 */
4505 	uint32_t compute_misc_reserved;  /* ordinal32 */
4506 	uint32_t compute_dispatch_id;  /* ordinal33 */
4507 	uint32_t compute_threadgroup_id;  /* ordinal34 */
4508 	uint32_t compute_relaunch;  /* ordinal35 */
4509 	uint32_t compute_wave_restore_addr_lo;  /* ordinal36 */
4510 	uint32_t compute_wave_restore_addr_hi;  /* ordinal37 */
4511 	uint32_t compute_wave_restore_control;  /* ordinal38 */
4512 	uint32_t reserved9;  /* ordinal39 */
4513 	uint32_t reserved10;  /* ordinal40 */
4514 	uint32_t reserved11;  /* ordinal41 */
4515 	uint32_t reserved12;  /* ordinal42 */
4516 	uint32_t reserved13;  /* ordinal43 */
4517 	uint32_t reserved14;  /* ordinal44 */
4518 	uint32_t reserved15;  /* ordinal45 */
4519 	uint32_t reserved16;  /* ordinal46 */
4520 	uint32_t reserved17;  /* ordinal47 */
4521 	uint32_t reserved18;  /* ordinal48 */
4522 	uint32_t reserved19;  /* ordinal49 */
4523 	uint32_t reserved20;  /* ordinal50 */
4524 	uint32_t reserved21;  /* ordinal51 */
4525 	uint32_t reserved22;  /* ordinal52 */
4526 	uint32_t reserved23;  /* ordinal53 */
4527 	uint32_t reserved24;  /* ordinal54 */
4528 	uint32_t reserved25;  /* ordinal55 */
4529 	uint32_t reserved26;  /* ordinal56 */
4530 	uint32_t reserved27;  /* ordinal57 */
4531 	uint32_t reserved28;  /* ordinal58 */
4532 	uint32_t reserved29;  /* ordinal59 */
4533 	uint32_t reserved30;  /* ordinal60 */
4534 	uint32_t reserved31;  /* ordinal61 */
4535 	uint32_t reserved32;  /* ordinal62 */
4536 	uint32_t reserved33;  /* ordinal63 */
4537 	uint32_t reserved34;  /* ordinal64 */
4538 	uint32_t compute_user_data_0;  /* ordinal65 */
4539 	uint32_t compute_user_data_1;  /* ordinal66 */
4540 	uint32_t compute_user_data_2;  /* ordinal67 */
4541 	uint32_t compute_user_data_3;  /* ordinal68 */
4542 	uint32_t compute_user_data_4;  /* ordinal69 */
4543 	uint32_t compute_user_data_5;  /* ordinal70 */
4544 	uint32_t compute_user_data_6;  /* ordinal71 */
4545 	uint32_t compute_user_data_7;  /* ordinal72 */
4546 	uint32_t compute_user_data_8;  /* ordinal73 */
4547 	uint32_t compute_user_data_9;  /* ordinal74 */
4548 	uint32_t compute_user_data_10;  /* ordinal75 */
4549 	uint32_t compute_user_data_11;  /* ordinal76 */
4550 	uint32_t compute_user_data_12;  /* ordinal77 */
4551 	uint32_t compute_user_data_13;  /* ordinal78 */
4552 	uint32_t compute_user_data_14;  /* ordinal79 */
4553 	uint32_t compute_user_data_15;  /* ordinal80 */
4554 	uint32_t cp_compute_csinvoc_count_lo;  /* ordinal81 */
4555 	uint32_t cp_compute_csinvoc_count_hi;  /* ordinal82 */
4556 	uint32_t reserved35;  /* ordinal83 */
4557 	uint32_t reserved36;  /* ordinal84 */
4558 	uint32_t reserved37;  /* ordinal85 */
4559 	uint32_t cp_mqd_query_time_lo;  /* ordinal86 */
4560 	uint32_t cp_mqd_query_time_hi;  /* ordinal87 */
4561 	uint32_t cp_mqd_connect_start_time_lo;  /* ordinal88 */
4562 	uint32_t cp_mqd_connect_start_time_hi;  /* ordinal89 */
4563 	uint32_t cp_mqd_connect_end_time_lo;  /* ordinal90 */
4564 	uint32_t cp_mqd_connect_end_time_hi;  /* ordinal91 */
4565 	uint32_t cp_mqd_connect_end_wf_count;  /* ordinal92 */
4566 	uint32_t cp_mqd_connect_end_pq_rptr;  /* ordinal93 */
4567 	uint32_t cp_mqd_connect_end_pq_wptr;  /* ordinal94 */
4568 	uint32_t cp_mqd_connect_end_ib_rptr;  /* ordinal95 */
4569 	uint32_t reserved38;  /* ordinal96 */
4570 	uint32_t reserved39;  /* ordinal97 */
4571 	uint32_t cp_mqd_save_start_time_lo;  /* ordinal98 */
4572 	uint32_t cp_mqd_save_start_time_hi;  /* ordinal99 */
4573 	uint32_t cp_mqd_save_end_time_lo;  /* ordinal100 */
4574 	uint32_t cp_mqd_save_end_time_hi;  /* ordinal101 */
4575 	uint32_t cp_mqd_restore_start_time_lo;  /* ordinal102 */
4576 	uint32_t cp_mqd_restore_start_time_hi;  /* ordinal103 */
4577 	uint32_t cp_mqd_restore_end_time_lo;  /* ordinal104 */
4578 	uint32_t cp_mqd_restore_end_time_hi;  /* ordinal105 */
4579 	uint32_t reserved40;  /* ordinal106 */
4580 	uint32_t reserved41;  /* ordinal107 */
4581 	uint32_t gds_cs_ctxsw_cnt0;  /* ordinal108 */
4582 	uint32_t gds_cs_ctxsw_cnt1;  /* ordinal109 */
4583 	uint32_t gds_cs_ctxsw_cnt2;  /* ordinal110 */
4584 	uint32_t gds_cs_ctxsw_cnt3;  /* ordinal111 */
4585 	uint32_t reserved42;  /* ordinal112 */
4586 	uint32_t reserved43;  /* ordinal113 */
4587 	uint32_t cp_pq_exe_status_lo;  /* ordinal114 */
4588 	uint32_t cp_pq_exe_status_hi;  /* ordinal115 */
4589 	uint32_t cp_packet_id_lo;  /* ordinal116 */
4590 	uint32_t cp_packet_id_hi;  /* ordinal117 */
4591 	uint32_t cp_packet_exe_status_lo;  /* ordinal118 */
4592 	uint32_t cp_packet_exe_status_hi;  /* ordinal119 */
4593 	uint32_t gds_save_base_addr_lo;  /* ordinal120 */
4594 	uint32_t gds_save_base_addr_hi;  /* ordinal121 */
4595 	uint32_t gds_save_mask_lo;  /* ordinal122 */
4596 	uint32_t gds_save_mask_hi;  /* ordinal123 */
4597 	uint32_t ctx_save_base_addr_lo;  /* ordinal124 */
4598 	uint32_t ctx_save_base_addr_hi;  /* ordinal125 */
4599 	uint32_t reserved44;  /* ordinal126 */
4600 	uint32_t reserved45;  /* ordinal127 */
4601 	uint32_t cp_mqd_base_addr_lo;  /* ordinal128 */
4602 	uint32_t cp_mqd_base_addr_hi;  /* ordinal129 */
4603 	uint32_t cp_hqd_active;  /* ordinal130 */
4604 	uint32_t cp_hqd_vmid;  /* ordinal131 */
4605 	uint32_t cp_hqd_persistent_state;  /* ordinal132 */
4606 	uint32_t cp_hqd_pipe_priority;  /* ordinal133 */
4607 	uint32_t cp_hqd_queue_priority;  /* ordinal134 */
4608 	uint32_t cp_hqd_quantum;  /* ordinal135 */
4609 	uint32_t cp_hqd_pq_base_lo;  /* ordinal136 */
4610 	uint32_t cp_hqd_pq_base_hi;  /* ordinal137 */
4611 	uint32_t cp_hqd_pq_rptr;  /* ordinal138 */
4612 	uint32_t cp_hqd_pq_rptr_report_addr_lo;  /* ordinal139 */
4613 	uint32_t cp_hqd_pq_rptr_report_addr_hi;  /* ordinal140 */
4614 	uint32_t cp_hqd_pq_wptr_poll_addr;  /* ordinal141 */
4615 	uint32_t cp_hqd_pq_wptr_poll_addr_hi;  /* ordinal142 */
4616 	uint32_t cp_hqd_pq_doorbell_control;  /* ordinal143 */
4617 	uint32_t cp_hqd_pq_wptr;  /* ordinal144 */
4618 	uint32_t cp_hqd_pq_control;  /* ordinal145 */
4619 	uint32_t cp_hqd_ib_base_addr_lo;  /* ordinal146 */
4620 	uint32_t cp_hqd_ib_base_addr_hi;  /* ordinal147 */
4621 	uint32_t cp_hqd_ib_rptr;  /* ordinal148 */
4622 	uint32_t cp_hqd_ib_control;  /* ordinal149 */
4623 	uint32_t cp_hqd_iq_timer;  /* ordinal150 */
4624 	uint32_t cp_hqd_iq_rptr;  /* ordinal151 */
4625 	uint32_t cp_hqd_dequeue_request;  /* ordinal152 */
4626 	uint32_t cp_hqd_dma_offload;  /* ordinal153 */
4627 	uint32_t cp_hqd_sema_cmd;  /* ordinal154 */
4628 	uint32_t cp_hqd_msg_type;  /* ordinal155 */
4629 	uint32_t cp_hqd_atomic0_preop_lo;  /* ordinal156 */
4630 	uint32_t cp_hqd_atomic0_preop_hi;  /* ordinal157 */
4631 	uint32_t cp_hqd_atomic1_preop_lo;  /* ordinal158 */
4632 	uint32_t cp_hqd_atomic1_preop_hi;  /* ordinal159 */
4633 	uint32_t cp_hqd_hq_status0;  /* ordinal160 */
4634 	uint32_t cp_hqd_hq_control0;  /* ordinal161 */
4635 	uint32_t cp_mqd_control;  /* ordinal162 */
4636 	uint32_t cp_hqd_hq_status1;  /* ordinal163 */
4637 	uint32_t cp_hqd_hq_control1;  /* ordinal164 */
4638 	uint32_t cp_hqd_eop_base_addr_lo;  /* ordinal165 */
4639 	uint32_t cp_hqd_eop_base_addr_hi;  /* ordinal166 */
4640 	uint32_t cp_hqd_eop_control;  /* ordinal167 */
4641 	uint32_t cp_hqd_eop_rptr;  /* ordinal168 */
4642 	uint32_t cp_hqd_eop_wptr;  /* ordinal169 */
4643 	uint32_t cp_hqd_eop_done_events;  /* ordinal170 */
4644 	uint32_t cp_hqd_ctx_save_base_addr_lo;  /* ordinal171 */
4645 	uint32_t cp_hqd_ctx_save_base_addr_hi;  /* ordinal172 */
4646 	uint32_t cp_hqd_ctx_save_control;  /* ordinal173 */
4647 	uint32_t cp_hqd_cntl_stack_offset;  /* ordinal174 */
4648 	uint32_t cp_hqd_cntl_stack_size;  /* ordinal175 */
4649 	uint32_t cp_hqd_wg_state_offset;  /* ordinal176 */
4650 	uint32_t cp_hqd_ctx_save_size;  /* ordinal177 */
4651 	uint32_t cp_hqd_gds_resource_state;  /* ordinal178 */
4652 	uint32_t cp_hqd_error;  /* ordinal179 */
4653 	uint32_t cp_hqd_eop_wptr_mem;  /* ordinal180 */
4654 	uint32_t cp_hqd_eop_dones;  /* ordinal181 */
4655 	uint32_t reserved46;  /* ordinal182 */
4656 	uint32_t reserved47;  /* ordinal183 */
4657 	uint32_t reserved48;  /* ordinal184 */
4658 	uint32_t reserved49;  /* ordinal185 */
4659 	uint32_t reserved50;  /* ordinal186 */
4660 	uint32_t reserved51;  /* ordinal187 */
4661 	uint32_t reserved52;  /* ordinal188 */
4662 	uint32_t reserved53;  /* ordinal189 */
4663 	uint32_t reserved54;  /* ordinal190 */
4664 	uint32_t reserved55;  /* ordinal191 */
4665 	uint32_t iqtimer_pkt_header;  /* ordinal192 */
4666 	uint32_t iqtimer_pkt_dw0;  /* ordinal193 */
4667 	uint32_t iqtimer_pkt_dw1;  /* ordinal194 */
4668 	uint32_t iqtimer_pkt_dw2;  /* ordinal195 */
4669 	uint32_t iqtimer_pkt_dw3;  /* ordinal196 */
4670 	uint32_t iqtimer_pkt_dw4;  /* ordinal197 */
4671 	uint32_t iqtimer_pkt_dw5;  /* ordinal198 */
4672 	uint32_t iqtimer_pkt_dw6;  /* ordinal199 */
4673 	uint32_t iqtimer_pkt_dw7;  /* ordinal200 */
4674 	uint32_t iqtimer_pkt_dw8;  /* ordinal201 */
4675 	uint32_t iqtimer_pkt_dw9;  /* ordinal202 */
4676 	uint32_t iqtimer_pkt_dw10;  /* ordinal203 */
4677 	uint32_t iqtimer_pkt_dw11;  /* ordinal204 */
4678 	uint32_t iqtimer_pkt_dw12;  /* ordinal205 */
4679 	uint32_t iqtimer_pkt_dw13;  /* ordinal206 */
4680 	uint32_t iqtimer_pkt_dw14;  /* ordinal207 */
4681 	uint32_t iqtimer_pkt_dw15;  /* ordinal208 */
4682 	uint32_t iqtimer_pkt_dw16;  /* ordinal209 */
4683 	uint32_t iqtimer_pkt_dw17;  /* ordinal210 */
4684 	uint32_t iqtimer_pkt_dw18;  /* ordinal211 */
4685 	uint32_t iqtimer_pkt_dw19;  /* ordinal212 */
4686 	uint32_t iqtimer_pkt_dw20;  /* ordinal213 */
4687 	uint32_t iqtimer_pkt_dw21;  /* ordinal214 */
4688 	uint32_t iqtimer_pkt_dw22;  /* ordinal215 */
4689 	uint32_t iqtimer_pkt_dw23;  /* ordinal216 */
4690 	uint32_t iqtimer_pkt_dw24;  /* ordinal217 */
4691 	uint32_t iqtimer_pkt_dw25;  /* ordinal218 */
4692 	uint32_t iqtimer_pkt_dw26;  /* ordinal219 */
4693 	uint32_t iqtimer_pkt_dw27;  /* ordinal220 */
4694 	uint32_t iqtimer_pkt_dw28;  /* ordinal221 */
4695 	uint32_t iqtimer_pkt_dw29;  /* ordinal222 */
4696 	uint32_t iqtimer_pkt_dw30;  /* ordinal223 */
4697 	uint32_t iqtimer_pkt_dw31;  /* ordinal224 */
4698 	uint32_t reserved56;  /* ordinal225 */
4699 	uint32_t reserved57;  /* ordinal226 */
4700 	uint32_t reserved58;  /* ordinal227 */
4701 	uint32_t set_resources_header;  /* ordinal228 */
4702 	uint32_t set_resources_dw1;  /* ordinal229 */
4703 	uint32_t set_resources_dw2;  /* ordinal230 */
4704 	uint32_t set_resources_dw3;  /* ordinal231 */
4705 	uint32_t set_resources_dw4;  /* ordinal232 */
4706 	uint32_t set_resources_dw5;  /* ordinal233 */
4707 	uint32_t set_resources_dw6;  /* ordinal234 */
4708 	uint32_t set_resources_dw7;  /* ordinal235 */
4709 	uint32_t reserved59;  /* ordinal236 */
4710 	uint32_t reserved60;  /* ordinal237 */
4711 	uint32_t reserved61;  /* ordinal238 */
4712 	uint32_t reserved62;  /* ordinal239 */
4713 	uint32_t reserved63;  /* ordinal240 */
4714 	uint32_t reserved64;  /* ordinal241 */
4715 	uint32_t reserved65;  /* ordinal242 */
4716 	uint32_t reserved66;  /* ordinal243 */
4717 	uint32_t reserved67;  /* ordinal244 */
4718 	uint32_t reserved68;  /* ordinal245 */
4719 	uint32_t reserved69;  /* ordinal246 */
4720 	uint32_t reserved70;  /* ordinal247 */
4721 	uint32_t reserved71;  /* ordinal248 */
4722 	uint32_t reserved72;  /* ordinal249 */
4723 	uint32_t reserved73;  /* ordinal250 */
4724 	uint32_t reserved74;  /* ordinal251 */
4725 	uint32_t reserved75;  /* ordinal252 */
4726 	uint32_t reserved76;  /* ordinal253 */
4727 	uint32_t reserved77;  /* ordinal254 */
4728 	uint32_t reserved78;  /* ordinal255 */
4729 
4730 	uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
4731 };
4732 
4733 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4734 {
4735 	int i, r;
4736 
4737 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4738 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4739 
4740 		if (ring->mqd_obj) {
4741 			r = amdgpu_bo_reserve(ring->mqd_obj, false);
4742 			if (unlikely(r != 0))
4743 				dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4744 
4745 			amdgpu_bo_unpin(ring->mqd_obj);
4746 			amdgpu_bo_unreserve(ring->mqd_obj);
4747 
4748 			amdgpu_bo_unref(&ring->mqd_obj);
4749 			ring->mqd_obj = NULL;
4750 		}
4751 	}
4752 }
4753 
4754 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4755 {
4756 	int r, i, j;
4757 	u32 tmp;
4758 	bool use_doorbell = true;
4759 	u64 hqd_gpu_addr;
4760 	u64 mqd_gpu_addr;
4761 	u64 eop_gpu_addr;
4762 	u64 wb_gpu_addr;
4763 	u32 *buf;
4764 	struct vi_mqd *mqd;
4765 
4766 	/* init the pipes */
4767 	mutex_lock(&adev->srbm_mutex);
4768 	for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
4769 		int me = (i < 4) ? 1 : 2;
4770 		int pipe = (i < 4) ? i : (i - 4);
4771 
4772 		eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
4773 		eop_gpu_addr >>= 8;
4774 
4775 		vi_srbm_select(adev, me, pipe, 0, 0);
4776 
4777 		/* write the EOP addr */
4778 		WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
4779 		WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4780 
4781 		/* set the VMID assigned */
4782 		WREG32(mmCP_HQD_VMID, 0);
4783 
4784 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4785 		tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4786 		tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4787 				    (order_base_2(MEC_HPD_SIZE / 4) - 1));
4788 		WREG32(mmCP_HQD_EOP_CONTROL, tmp);
4789 	}
4790 	vi_srbm_select(adev, 0, 0, 0, 0);
4791 	mutex_unlock(&adev->srbm_mutex);
4792 
4793 	/* init the queues.  Just two for now. */
4794 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4795 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4796 
4797 		if (ring->mqd_obj == NULL) {
4798 			r = amdgpu_bo_create(adev,
4799 					     sizeof(struct vi_mqd),
4800 					     PAGE_SIZE, true,
4801 					     AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4802 					     NULL, &ring->mqd_obj);
4803 			if (r) {
4804 				dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4805 				return r;
4806 			}
4807 		}
4808 
4809 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
4810 		if (unlikely(r != 0)) {
4811 			gfx_v8_0_cp_compute_fini(adev);
4812 			return r;
4813 		}
4814 		r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4815 				  &mqd_gpu_addr);
4816 		if (r) {
4817 			dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4818 			gfx_v8_0_cp_compute_fini(adev);
4819 			return r;
4820 		}
4821 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4822 		if (r) {
4823 			dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4824 			gfx_v8_0_cp_compute_fini(adev);
4825 			return r;
4826 		}
4827 
4828 		/* init the mqd struct */
4829 		memset(buf, 0, sizeof(struct vi_mqd));
4830 
4831 		mqd = (struct vi_mqd *)buf;
4832 		mqd->header = 0xC0310800;
4833 		mqd->compute_pipelinestat_enable = 0x00000001;
4834 		mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4835 		mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4836 		mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4837 		mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4838 		mqd->compute_misc_reserved = 0x00000003;
4839 
4840 		mutex_lock(&adev->srbm_mutex);
4841 		vi_srbm_select(adev, ring->me,
4842 			       ring->pipe,
4843 			       ring->queue, 0);
4844 
4845 		/* disable wptr polling */
4846 		tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4847 		tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4848 		WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4849 
4850 		mqd->cp_hqd_eop_base_addr_lo =
4851 			RREG32(mmCP_HQD_EOP_BASE_ADDR);
4852 		mqd->cp_hqd_eop_base_addr_hi =
4853 			RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
4854 
4855 		/* enable doorbell? */
4856 		tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4857 		if (use_doorbell) {
4858 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4859 		} else {
4860 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4861 		}
4862 		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4863 		mqd->cp_hqd_pq_doorbell_control = tmp;
4864 
4865 		/* disable the queue if it's active */
4866 		mqd->cp_hqd_dequeue_request = 0;
4867 		mqd->cp_hqd_pq_rptr = 0;
4868 		mqd->cp_hqd_pq_wptr= 0;
4869 		if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4870 			WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4871 			for (j = 0; j < adev->usec_timeout; j++) {
4872 				if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4873 					break;
4874 				udelay(1);
4875 			}
4876 			WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4877 			WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4878 			WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4879 		}
4880 
4881 		/* set the pointer to the MQD */
4882 		mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4883 		mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4884 		WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4885 		WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4886 
4887 		/* set MQD vmid to 0 */
4888 		tmp = RREG32(mmCP_MQD_CONTROL);
4889 		tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4890 		WREG32(mmCP_MQD_CONTROL, tmp);
4891 		mqd->cp_mqd_control = tmp;
4892 
4893 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4894 		hqd_gpu_addr = ring->gpu_addr >> 8;
4895 		mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4896 		mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4897 		WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4898 		WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4899 
4900 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4901 		tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4902 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4903 				    (order_base_2(ring->ring_size / 4) - 1));
4904 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4905 			       ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4906 #ifdef __BIG_ENDIAN
4907 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4908 #endif
4909 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4910 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4911 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4912 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4913 		WREG32(mmCP_HQD_PQ_CONTROL, tmp);
4914 		mqd->cp_hqd_pq_control = tmp;
4915 
4916 		/* set the wb address wether it's enabled or not */
4917 		wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4918 		mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4919 		mqd->cp_hqd_pq_rptr_report_addr_hi =
4920 			upper_32_bits(wb_gpu_addr) & 0xffff;
4921 		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4922 		       mqd->cp_hqd_pq_rptr_report_addr_lo);
4923 		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4924 		       mqd->cp_hqd_pq_rptr_report_addr_hi);
4925 
4926 		/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4927 		wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4928 		mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4929 		mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4930 		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
4931 		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4932 		       mqd->cp_hqd_pq_wptr_poll_addr_hi);
4933 
4934 		/* enable the doorbell if requested */
4935 		if (use_doorbell) {
4936 			if ((adev->asic_type == CHIP_CARRIZO) ||
4937 			    (adev->asic_type == CHIP_FIJI) ||
4938 			    (adev->asic_type == CHIP_STONEY) ||
4939 			    (adev->asic_type == CHIP_POLARIS11) ||
4940 			    (adev->asic_type == CHIP_POLARIS10)) {
4941 				WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4942 				       AMDGPU_DOORBELL_KIQ << 2);
4943 				WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4944 				       AMDGPU_DOORBELL_MEC_RING7 << 2);
4945 			}
4946 			tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4947 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4948 					    DOORBELL_OFFSET, ring->doorbell_index);
4949 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4950 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4951 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4952 			mqd->cp_hqd_pq_doorbell_control = tmp;
4953 
4954 		} else {
4955 			mqd->cp_hqd_pq_doorbell_control = 0;
4956 		}
4957 		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
4958 		       mqd->cp_hqd_pq_doorbell_control);
4959 
4960 		/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4961 		ring->wptr = 0;
4962 		mqd->cp_hqd_pq_wptr = ring->wptr;
4963 		WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4964 		mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4965 
4966 		/* set the vmid for the queue */
4967 		mqd->cp_hqd_vmid = 0;
4968 		WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4969 
4970 		tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4971 		tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4972 		WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
4973 		mqd->cp_hqd_persistent_state = tmp;
4974 		if (adev->asic_type == CHIP_STONEY ||
4975 			adev->asic_type == CHIP_POLARIS11 ||
4976 			adev->asic_type == CHIP_POLARIS10) {
4977 			tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
4978 			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
4979 			WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
4980 		}
4981 
4982 		/* activate the queue */
4983 		mqd->cp_hqd_active = 1;
4984 		WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4985 
4986 		vi_srbm_select(adev, 0, 0, 0, 0);
4987 		mutex_unlock(&adev->srbm_mutex);
4988 
4989 		amdgpu_bo_kunmap(ring->mqd_obj);
4990 		amdgpu_bo_unreserve(ring->mqd_obj);
4991 	}
4992 
4993 	if (use_doorbell) {
4994 		tmp = RREG32(mmCP_PQ_STATUS);
4995 		tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4996 		WREG32(mmCP_PQ_STATUS, tmp);
4997 	}
4998 
4999 	gfx_v8_0_cp_compute_enable(adev, true);
5000 
5001 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5002 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5003 
5004 		ring->ready = true;
5005 		r = amdgpu_ring_test_ring(ring);
5006 		if (r)
5007 			ring->ready = false;
5008 	}
5009 
5010 	return 0;
5011 }
5012 
5013 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
5014 {
5015 	int r;
5016 
5017 	if (!(adev->flags & AMD_IS_APU))
5018 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5019 
5020 	if (!adev->pp_enabled) {
5021 		if (!adev->firmware.smu_load) {
5022 			/* legacy firmware loading */
5023 			r = gfx_v8_0_cp_gfx_load_microcode(adev);
5024 			if (r)
5025 				return r;
5026 
5027 			r = gfx_v8_0_cp_compute_load_microcode(adev);
5028 			if (r)
5029 				return r;
5030 		} else {
5031 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5032 							AMDGPU_UCODE_ID_CP_CE);
5033 			if (r)
5034 				return -EINVAL;
5035 
5036 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5037 							AMDGPU_UCODE_ID_CP_PFP);
5038 			if (r)
5039 				return -EINVAL;
5040 
5041 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5042 							AMDGPU_UCODE_ID_CP_ME);
5043 			if (r)
5044 				return -EINVAL;
5045 
5046 			if (adev->asic_type == CHIP_TOPAZ) {
5047 				r = gfx_v8_0_cp_compute_load_microcode(adev);
5048 				if (r)
5049 					return r;
5050 			} else {
5051 				r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5052 										 AMDGPU_UCODE_ID_CP_MEC1);
5053 				if (r)
5054 					return -EINVAL;
5055 			}
5056 		}
5057 	}
5058 
5059 	r = gfx_v8_0_cp_gfx_resume(adev);
5060 	if (r)
5061 		return r;
5062 
5063 	r = gfx_v8_0_cp_compute_resume(adev);
5064 	if (r)
5065 		return r;
5066 
5067 	gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5068 
5069 	return 0;
5070 }
5071 
5072 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5073 {
5074 	gfx_v8_0_cp_gfx_enable(adev, enable);
5075 	gfx_v8_0_cp_compute_enable(adev, enable);
5076 }
5077 
5078 static int gfx_v8_0_hw_init(void *handle)
5079 {
5080 	int r;
5081 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5082 
5083 	gfx_v8_0_init_golden_registers(adev);
5084 	gfx_v8_0_gpu_init(adev);
5085 
5086 	r = gfx_v8_0_rlc_resume(adev);
5087 	if (r)
5088 		return r;
5089 
5090 	r = gfx_v8_0_cp_resume(adev);
5091 
5092 	return r;
5093 }
5094 
5095 static int gfx_v8_0_hw_fini(void *handle)
5096 {
5097 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5098 
5099 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5100 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5101 	gfx_v8_0_cp_enable(adev, false);
5102 	gfx_v8_0_rlc_stop(adev);
5103 	gfx_v8_0_cp_compute_fini(adev);
5104 
5105 	amdgpu_set_powergating_state(adev,
5106 			AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5107 
5108 	return 0;
5109 }
5110 
5111 static int gfx_v8_0_suspend(void *handle)
5112 {
5113 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5114 
5115 	return gfx_v8_0_hw_fini(adev);
5116 }
5117 
5118 static int gfx_v8_0_resume(void *handle)
5119 {
5120 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5121 
5122 	return gfx_v8_0_hw_init(adev);
5123 }
5124 
5125 static bool gfx_v8_0_is_idle(void *handle)
5126 {
5127 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5128 
5129 	if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5130 		return false;
5131 	else
5132 		return true;
5133 }
5134 
5135 static int gfx_v8_0_wait_for_idle(void *handle)
5136 {
5137 	unsigned i;
5138 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5139 
5140 	for (i = 0; i < adev->usec_timeout; i++) {
5141 		if (gfx_v8_0_is_idle(handle))
5142 			return 0;
5143 
5144 		udelay(1);
5145 	}
5146 	return -ETIMEDOUT;
5147 }
5148 
5149 static bool gfx_v8_0_check_soft_reset(void *handle)
5150 {
5151 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5152 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5153 	u32 tmp;
5154 
5155 	/* GRBM_STATUS */
5156 	tmp = RREG32(mmGRBM_STATUS);
5157 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5158 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5159 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5160 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5161 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5162 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5163 		   GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5164 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5165 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5166 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5167 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5168 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5169 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5170 	}
5171 
5172 	/* GRBM_STATUS2 */
5173 	tmp = RREG32(mmGRBM_STATUS2);
5174 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5175 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5176 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5177 
5178 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5179 	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5180 	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5181 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5182 						SOFT_RESET_CPF, 1);
5183 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5184 						SOFT_RESET_CPC, 1);
5185 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5186 						SOFT_RESET_CPG, 1);
5187 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5188 						SOFT_RESET_GRBM, 1);
5189 	}
5190 
5191 	/* SRBM_STATUS */
5192 	tmp = RREG32(mmSRBM_STATUS);
5193 	if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5194 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5195 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5196 	if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5197 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5198 						SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5199 
5200 	if (grbm_soft_reset || srbm_soft_reset) {
5201 		adev->gfx.grbm_soft_reset = grbm_soft_reset;
5202 		adev->gfx.srbm_soft_reset = srbm_soft_reset;
5203 		return true;
5204 	} else {
5205 		adev->gfx.grbm_soft_reset = 0;
5206 		adev->gfx.srbm_soft_reset = 0;
5207 		return false;
5208 	}
5209 }
5210 
5211 static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev,
5212 				  struct amdgpu_ring *ring)
5213 {
5214 	int i;
5215 
5216 	vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5217 	if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
5218 		u32 tmp;
5219 		tmp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
5220 		tmp = REG_SET_FIELD(tmp, CP_HQD_DEQUEUE_REQUEST,
5221 				    DEQUEUE_REQ, 2);
5222 		WREG32(mmCP_HQD_DEQUEUE_REQUEST, tmp);
5223 		for (i = 0; i < adev->usec_timeout; i++) {
5224 			if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
5225 				break;
5226 			udelay(1);
5227 		}
5228 	}
5229 }
5230 
5231 static int gfx_v8_0_pre_soft_reset(void *handle)
5232 {
5233 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5234 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5235 
5236 	if ((!adev->gfx.grbm_soft_reset) &&
5237 	    (!adev->gfx.srbm_soft_reset))
5238 		return 0;
5239 
5240 	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5241 	srbm_soft_reset = adev->gfx.srbm_soft_reset;
5242 
5243 	/* stop the rlc */
5244 	gfx_v8_0_rlc_stop(adev);
5245 
5246 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5247 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5248 		/* Disable GFX parsing/prefetching */
5249 		gfx_v8_0_cp_gfx_enable(adev, false);
5250 
5251 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5252 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5253 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5254 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5255 		int i;
5256 
5257 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5258 			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5259 
5260 			gfx_v8_0_inactive_hqd(adev, ring);
5261 		}
5262 		/* Disable MEC parsing/prefetching */
5263 		gfx_v8_0_cp_compute_enable(adev, false);
5264 	}
5265 
5266        return 0;
5267 }
5268 
5269 static int gfx_v8_0_soft_reset(void *handle)
5270 {
5271 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5272 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5273 	u32 tmp;
5274 
5275 	if ((!adev->gfx.grbm_soft_reset) &&
5276 	    (!adev->gfx.srbm_soft_reset))
5277 		return 0;
5278 
5279 	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5280 	srbm_soft_reset = adev->gfx.srbm_soft_reset;
5281 
5282 	if (grbm_soft_reset || srbm_soft_reset) {
5283 		tmp = RREG32(mmGMCON_DEBUG);
5284 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5285 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5286 		WREG32(mmGMCON_DEBUG, tmp);
5287 		udelay(50);
5288 	}
5289 
5290 	if (grbm_soft_reset) {
5291 		tmp = RREG32(mmGRBM_SOFT_RESET);
5292 		tmp |= grbm_soft_reset;
5293 		dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5294 		WREG32(mmGRBM_SOFT_RESET, tmp);
5295 		tmp = RREG32(mmGRBM_SOFT_RESET);
5296 
5297 		udelay(50);
5298 
5299 		tmp &= ~grbm_soft_reset;
5300 		WREG32(mmGRBM_SOFT_RESET, tmp);
5301 		tmp = RREG32(mmGRBM_SOFT_RESET);
5302 	}
5303 
5304 	if (srbm_soft_reset) {
5305 		tmp = RREG32(mmSRBM_SOFT_RESET);
5306 		tmp |= srbm_soft_reset;
5307 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5308 		WREG32(mmSRBM_SOFT_RESET, tmp);
5309 		tmp = RREG32(mmSRBM_SOFT_RESET);
5310 
5311 		udelay(50);
5312 
5313 		tmp &= ~srbm_soft_reset;
5314 		WREG32(mmSRBM_SOFT_RESET, tmp);
5315 		tmp = RREG32(mmSRBM_SOFT_RESET);
5316 	}
5317 
5318 	if (grbm_soft_reset || srbm_soft_reset) {
5319 		tmp = RREG32(mmGMCON_DEBUG);
5320 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5321 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5322 		WREG32(mmGMCON_DEBUG, tmp);
5323 	}
5324 
5325 	/* Wait a little for things to settle down */
5326 	udelay(50);
5327 
5328 	return 0;
5329 }
5330 
5331 static void gfx_v8_0_init_hqd(struct amdgpu_device *adev,
5332 			      struct amdgpu_ring *ring)
5333 {
5334 	vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5335 	WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
5336 	WREG32(mmCP_HQD_PQ_RPTR, 0);
5337 	WREG32(mmCP_HQD_PQ_WPTR, 0);
5338 	vi_srbm_select(adev, 0, 0, 0, 0);
5339 }
5340 
5341 static int gfx_v8_0_post_soft_reset(void *handle)
5342 {
5343 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5344 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5345 
5346 	if ((!adev->gfx.grbm_soft_reset) &&
5347 	    (!adev->gfx.srbm_soft_reset))
5348 		return 0;
5349 
5350 	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5351 	srbm_soft_reset = adev->gfx.srbm_soft_reset;
5352 
5353 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5354 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5355 		gfx_v8_0_cp_gfx_resume(adev);
5356 
5357 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5358 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5359 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5360 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5361 		int i;
5362 
5363 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5364 			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5365 
5366 			gfx_v8_0_init_hqd(adev, ring);
5367 		}
5368 		gfx_v8_0_cp_compute_resume(adev);
5369 	}
5370 	gfx_v8_0_rlc_start(adev);
5371 
5372 	return 0;
5373 }
5374 
5375 /**
5376  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5377  *
5378  * @adev: amdgpu_device pointer
5379  *
5380  * Fetches a GPU clock counter snapshot.
5381  * Returns the 64 bit clock counter snapshot.
5382  */
5383 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5384 {
5385 	uint64_t clock;
5386 
5387 	mutex_lock(&adev->gfx.gpu_clock_mutex);
5388 	WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5389 	clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5390 		((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5391 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
5392 	return clock;
5393 }
5394 
5395 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5396 					  uint32_t vmid,
5397 					  uint32_t gds_base, uint32_t gds_size,
5398 					  uint32_t gws_base, uint32_t gws_size,
5399 					  uint32_t oa_base, uint32_t oa_size)
5400 {
5401 	gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5402 	gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5403 
5404 	gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5405 	gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5406 
5407 	oa_base = oa_base >> AMDGPU_OA_SHIFT;
5408 	oa_size = oa_size >> AMDGPU_OA_SHIFT;
5409 
5410 	/* GDS Base */
5411 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5412 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5413 				WRITE_DATA_DST_SEL(0)));
5414 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5415 	amdgpu_ring_write(ring, 0);
5416 	amdgpu_ring_write(ring, gds_base);
5417 
5418 	/* GDS Size */
5419 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5420 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5421 				WRITE_DATA_DST_SEL(0)));
5422 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5423 	amdgpu_ring_write(ring, 0);
5424 	amdgpu_ring_write(ring, gds_size);
5425 
5426 	/* GWS */
5427 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5428 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5429 				WRITE_DATA_DST_SEL(0)));
5430 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5431 	amdgpu_ring_write(ring, 0);
5432 	amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5433 
5434 	/* OA */
5435 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5436 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5437 				WRITE_DATA_DST_SEL(0)));
5438 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5439 	amdgpu_ring_write(ring, 0);
5440 	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5441 }
5442 
5443 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5444 {
5445 	WREG32(mmSQ_IND_INDEX,
5446 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5447 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5448 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
5449 		(SQ_IND_INDEX__FORCE_READ_MASK));
5450 	return RREG32(mmSQ_IND_DATA);
5451 }
5452 
5453 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5454 {
5455 	/* type 0 wave data */
5456 	dst[(*no_fields)++] = 0;
5457 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5458 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5459 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5460 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5461 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5462 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5463 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5464 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5465 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5466 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5467 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5468 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5469 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5470 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5471 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5472 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5473 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5474 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5475 }
5476 
5477 
5478 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5479 	.get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5480 	.select_se_sh = &gfx_v8_0_select_se_sh,
5481 	.read_wave_data = &gfx_v8_0_read_wave_data,
5482 };
5483 
5484 static int gfx_v8_0_early_init(void *handle)
5485 {
5486 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5487 
5488 	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5489 	adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5490 	adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5491 	gfx_v8_0_set_ring_funcs(adev);
5492 	gfx_v8_0_set_irq_funcs(adev);
5493 	gfx_v8_0_set_gds_init(adev);
5494 	gfx_v8_0_set_rlc_funcs(adev);
5495 
5496 	return 0;
5497 }
5498 
5499 static int gfx_v8_0_late_init(void *handle)
5500 {
5501 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5502 	int r;
5503 
5504 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5505 	if (r)
5506 		return r;
5507 
5508 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5509 	if (r)
5510 		return r;
5511 
5512 	/* requires IBs so do in late init after IB pool is initialized */
5513 	r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5514 	if (r)
5515 		return r;
5516 
5517 	amdgpu_set_powergating_state(adev,
5518 			AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5519 
5520 	return 0;
5521 }
5522 
5523 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5524 						       bool enable)
5525 {
5526 	if (adev->asic_type == CHIP_POLARIS11)
5527 		/* Send msg to SMU via Powerplay */
5528 		amdgpu_set_powergating_state(adev,
5529 					     AMD_IP_BLOCK_TYPE_SMC,
5530 					     enable ?
5531 					     AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5532 
5533 	WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5534 }
5535 
5536 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5537 							bool enable)
5538 {
5539 	WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5540 }
5541 
5542 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5543 		bool enable)
5544 {
5545 	WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5546 }
5547 
5548 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5549 					  bool enable)
5550 {
5551 	WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5552 }
5553 
5554 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5555 						bool enable)
5556 {
5557 	WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5558 
5559 	/* Read any GFX register to wake up GFX. */
5560 	if (!enable)
5561 		RREG32(mmDB_RENDER_CONTROL);
5562 }
5563 
5564 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5565 					  bool enable)
5566 {
5567 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5568 		cz_enable_gfx_cg_power_gating(adev, true);
5569 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5570 			cz_enable_gfx_pipeline_power_gating(adev, true);
5571 	} else {
5572 		cz_enable_gfx_cg_power_gating(adev, false);
5573 		cz_enable_gfx_pipeline_power_gating(adev, false);
5574 	}
5575 }
5576 
5577 static int gfx_v8_0_set_powergating_state(void *handle,
5578 					  enum amd_powergating_state state)
5579 {
5580 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5581 	bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
5582 
5583 	if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5584 		return 0;
5585 
5586 	switch (adev->asic_type) {
5587 	case CHIP_CARRIZO:
5588 	case CHIP_STONEY:
5589 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)
5590 			cz_update_gfx_cg_power_gating(adev, enable);
5591 
5592 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5593 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5594 		else
5595 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5596 
5597 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5598 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5599 		else
5600 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5601 		break;
5602 	case CHIP_POLARIS11:
5603 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5604 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5605 		else
5606 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5607 
5608 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5609 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5610 		else
5611 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5612 
5613 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5614 			polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5615 		else
5616 			polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5617 		break;
5618 	default:
5619 		break;
5620 	}
5621 
5622 	return 0;
5623 }
5624 
5625 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5626 				     uint32_t reg_addr, uint32_t cmd)
5627 {
5628 	uint32_t data;
5629 
5630 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5631 
5632 	WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5633 	WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5634 
5635 	data = RREG32(mmRLC_SERDES_WR_CTRL);
5636 	if (adev->asic_type == CHIP_STONEY)
5637 		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5638 			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5639 			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5640 			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5641 			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5642 			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5643 			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5644 			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5645 			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5646 	else
5647 		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5648 			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5649 			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5650 			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5651 			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5652 			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5653 			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5654 			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5655 			  RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5656 			  RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5657 			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5658 	data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5659 		 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5660 		 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5661 		 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5662 
5663 	WREG32(mmRLC_SERDES_WR_CTRL, data);
5664 }
5665 
5666 #define MSG_ENTER_RLC_SAFE_MODE     1
5667 #define MSG_EXIT_RLC_SAFE_MODE      0
5668 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5669 #define RLC_GPR_REG2__REQ__SHIFT 0
5670 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5671 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5672 
5673 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
5674 {
5675 	u32 data = 0;
5676 	unsigned i;
5677 
5678 	data = RREG32(mmRLC_CNTL);
5679 	if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5680 		return;
5681 
5682 	if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5683 	    (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5684 			       AMD_PG_SUPPORT_GFX_DMG))) {
5685 		data |= RLC_GPR_REG2__REQ_MASK;
5686 		data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5687 		data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5688 		WREG32(mmRLC_GPR_REG2, data);
5689 
5690 		for (i = 0; i < adev->usec_timeout; i++) {
5691 			if ((RREG32(mmRLC_GPM_STAT) &
5692 			     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5693 			      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5694 			    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5695 			     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5696 				break;
5697 			udelay(1);
5698 		}
5699 
5700 		for (i = 0; i < adev->usec_timeout; i++) {
5701 			if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
5702 				break;
5703 			udelay(1);
5704 		}
5705 		adev->gfx.rlc.in_safe_mode = true;
5706 	}
5707 }
5708 
5709 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
5710 {
5711 	u32 data;
5712 	unsigned i;
5713 
5714 	data = RREG32(mmRLC_CNTL);
5715 	if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5716 		return;
5717 
5718 	if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5719 	    (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5720 			       AMD_PG_SUPPORT_GFX_DMG))) {
5721 		data |= RLC_GPR_REG2__REQ_MASK;
5722 		data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5723 		data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5724 		WREG32(mmRLC_GPR_REG2, data);
5725 		adev->gfx.rlc.in_safe_mode = false;
5726 	}
5727 
5728 	for (i = 0; i < adev->usec_timeout; i++) {
5729 		if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
5730 			break;
5731 		udelay(1);
5732 	}
5733 }
5734 
5735 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5736 {
5737 	u32 data;
5738 	unsigned i;
5739 
5740 	data = RREG32(mmRLC_CNTL);
5741 	if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5742 		return;
5743 
5744 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5745 		data |= RLC_SAFE_MODE__CMD_MASK;
5746 		data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5747 		data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5748 		WREG32(mmRLC_SAFE_MODE, data);
5749 
5750 		for (i = 0; i < adev->usec_timeout; i++) {
5751 			if ((RREG32(mmRLC_GPM_STAT) &
5752 			     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5753 			      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5754 			    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5755 			     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5756 				break;
5757 			udelay(1);
5758 		}
5759 
5760 		for (i = 0; i < adev->usec_timeout; i++) {
5761 			if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5762 				break;
5763 			udelay(1);
5764 		}
5765 		adev->gfx.rlc.in_safe_mode = true;
5766 	}
5767 }
5768 
5769 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5770 {
5771 	u32 data = 0;
5772 	unsigned i;
5773 
5774 	data = RREG32(mmRLC_CNTL);
5775 	if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5776 		return;
5777 
5778 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5779 		if (adev->gfx.rlc.in_safe_mode) {
5780 			data |= RLC_SAFE_MODE__CMD_MASK;
5781 			data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5782 			WREG32(mmRLC_SAFE_MODE, data);
5783 			adev->gfx.rlc.in_safe_mode = false;
5784 		}
5785 	}
5786 
5787 	for (i = 0; i < adev->usec_timeout; i++) {
5788 		if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5789 			break;
5790 		udelay(1);
5791 	}
5792 }
5793 
5794 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
5795 {
5796 	adev->gfx.rlc.in_safe_mode = true;
5797 }
5798 
5799 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
5800 {
5801 	adev->gfx.rlc.in_safe_mode = false;
5802 }
5803 
5804 static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
5805 	.enter_safe_mode = cz_enter_rlc_safe_mode,
5806 	.exit_safe_mode = cz_exit_rlc_safe_mode
5807 };
5808 
5809 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5810 	.enter_safe_mode = iceland_enter_rlc_safe_mode,
5811 	.exit_safe_mode = iceland_exit_rlc_safe_mode
5812 };
5813 
5814 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
5815 	.enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
5816 	.exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
5817 };
5818 
5819 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5820 						      bool enable)
5821 {
5822 	uint32_t temp, data;
5823 
5824 	adev->gfx.rlc.funcs->enter_safe_mode(adev);
5825 
5826 	/* It is disabled by HW by default */
5827 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5828 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5829 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5830 				/* 1 - RLC memory Light sleep */
5831 				WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5832 
5833 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5834 				WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5835 		}
5836 
5837 		/* 3 - RLC_CGTT_MGCG_OVERRIDE */
5838 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5839 		if (adev->flags & AMD_IS_APU)
5840 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5841 				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5842 				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5843 		else
5844 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5845 				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5846 				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5847 				  RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5848 
5849 		if (temp != data)
5850 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5851 
5852 		/* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5853 		gfx_v8_0_wait_for_rlc_serdes(adev);
5854 
5855 		/* 5 - clear mgcg override */
5856 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5857 
5858 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5859 			/* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5860 			temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5861 			data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5862 			data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5863 			data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5864 			data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5865 			if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5866 			    (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5867 				data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5868 			data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5869 			data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5870 			if (temp != data)
5871 				WREG32(mmCGTS_SM_CTRL_REG, data);
5872 		}
5873 		udelay(50);
5874 
5875 		/* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5876 		gfx_v8_0_wait_for_rlc_serdes(adev);
5877 	} else {
5878 		/* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5879 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5880 		data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5881 				RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5882 				RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5883 				RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5884 		if (temp != data)
5885 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5886 
5887 		/* 2 - disable MGLS in RLC */
5888 		data = RREG32(mmRLC_MEM_SLP_CNTL);
5889 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5890 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5891 			WREG32(mmRLC_MEM_SLP_CNTL, data);
5892 		}
5893 
5894 		/* 3 - disable MGLS in CP */
5895 		data = RREG32(mmCP_MEM_SLP_CNTL);
5896 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5897 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5898 			WREG32(mmCP_MEM_SLP_CNTL, data);
5899 		}
5900 
5901 		/* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5902 		temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5903 		data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5904 				CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5905 		if (temp != data)
5906 			WREG32(mmCGTS_SM_CTRL_REG, data);
5907 
5908 		/* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5909 		gfx_v8_0_wait_for_rlc_serdes(adev);
5910 
5911 		/* 6 - set mgcg override */
5912 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5913 
5914 		udelay(50);
5915 
5916 		/* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5917 		gfx_v8_0_wait_for_rlc_serdes(adev);
5918 	}
5919 
5920 	adev->gfx.rlc.funcs->exit_safe_mode(adev);
5921 }
5922 
5923 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5924 						      bool enable)
5925 {
5926 	uint32_t temp, temp1, data, data1;
5927 
5928 	temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5929 
5930 	adev->gfx.rlc.funcs->enter_safe_mode(adev);
5931 
5932 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5933 		/* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
5934 		 * Cmp_busy/GFX_Idle interrupts
5935 		 */
5936 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5937 
5938 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5939 		data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5940 		if (temp1 != data1)
5941 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5942 
5943 		/* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5944 		gfx_v8_0_wait_for_rlc_serdes(adev);
5945 
5946 		/* 3 - clear cgcg override */
5947 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5948 
5949 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5950 		gfx_v8_0_wait_for_rlc_serdes(adev);
5951 
5952 		/* 4 - write cmd to set CGLS */
5953 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5954 
5955 		/* 5 - enable cgcg */
5956 		data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5957 
5958 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5959 			/* enable cgls*/
5960 			data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5961 
5962 			temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5963 			data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5964 
5965 			if (temp1 != data1)
5966 				WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5967 		} else {
5968 			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5969 		}
5970 
5971 		if (temp != data)
5972 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5973 	} else {
5974 		/* disable cntx_empty_int_enable & GFX Idle interrupt */
5975 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5976 
5977 		/* TEST CGCG */
5978 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5979 		data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5980 				RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5981 		if (temp1 != data1)
5982 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5983 
5984 		/* read gfx register to wake up cgcg */
5985 		RREG32(mmCB_CGTT_SCLK_CTRL);
5986 		RREG32(mmCB_CGTT_SCLK_CTRL);
5987 		RREG32(mmCB_CGTT_SCLK_CTRL);
5988 		RREG32(mmCB_CGTT_SCLK_CTRL);
5989 
5990 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5991 		gfx_v8_0_wait_for_rlc_serdes(adev);
5992 
5993 		/* write cmd to Set CGCG Overrride */
5994 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5995 
5996 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5997 		gfx_v8_0_wait_for_rlc_serdes(adev);
5998 
5999 		/* write cmd to Clear CGLS */
6000 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
6001 
6002 		/* disable cgcg, cgls should be disabled too. */
6003 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
6004 			  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
6005 		if (temp != data)
6006 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6007 	}
6008 
6009 	gfx_v8_0_wait_for_rlc_serdes(adev);
6010 
6011 	adev->gfx.rlc.funcs->exit_safe_mode(adev);
6012 }
6013 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
6014 					    bool enable)
6015 {
6016 	if (enable) {
6017 		/* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
6018 		 * ===  MGCG + MGLS + TS(CG/LS) ===
6019 		 */
6020 		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6021 		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6022 	} else {
6023 		/* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
6024 		 * ===  CGCG + CGLS ===
6025 		 */
6026 		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6027 		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6028 	}
6029 	return 0;
6030 }
6031 
6032 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
6033 					  enum amd_clockgating_state state)
6034 {
6035 	uint32_t msg_id, pp_state;
6036 	void *pp_handle = adev->powerplay.pp_handle;
6037 
6038 	if (state == AMD_CG_STATE_UNGATE)
6039 		pp_state = 0;
6040 	else
6041 		pp_state = PP_STATE_CG | PP_STATE_LS;
6042 
6043 	msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6044 			PP_BLOCK_GFX_CG,
6045 			PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6046 			pp_state);
6047 	amd_set_clockgating_by_smu(pp_handle, msg_id);
6048 
6049 	msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6050 			PP_BLOCK_GFX_MG,
6051 			PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6052 			pp_state);
6053 	amd_set_clockgating_by_smu(pp_handle, msg_id);
6054 
6055 	return 0;
6056 }
6057 
6058 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6059 					  enum amd_clockgating_state state)
6060 {
6061 	uint32_t msg_id, pp_state;
6062 	void *pp_handle = adev->powerplay.pp_handle;
6063 
6064 	if (state == AMD_CG_STATE_UNGATE)
6065 		pp_state = 0;
6066 	else
6067 		pp_state = PP_STATE_CG | PP_STATE_LS;
6068 
6069 	msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6070 			PP_BLOCK_GFX_CG,
6071 			PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6072 			pp_state);
6073 	amd_set_clockgating_by_smu(pp_handle, msg_id);
6074 
6075 	msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6076 			PP_BLOCK_GFX_3D,
6077 			PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6078 			pp_state);
6079 	amd_set_clockgating_by_smu(pp_handle, msg_id);
6080 
6081 	msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6082 			PP_BLOCK_GFX_MG,
6083 			PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6084 			pp_state);
6085 	amd_set_clockgating_by_smu(pp_handle, msg_id);
6086 
6087 	msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6088 			PP_BLOCK_GFX_RLC,
6089 			PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6090 			pp_state);
6091 	amd_set_clockgating_by_smu(pp_handle, msg_id);
6092 
6093 	msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6094 			PP_BLOCK_GFX_CP,
6095 			PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6096 			pp_state);
6097 	amd_set_clockgating_by_smu(pp_handle, msg_id);
6098 
6099 	return 0;
6100 }
6101 
6102 static int gfx_v8_0_set_clockgating_state(void *handle,
6103 					  enum amd_clockgating_state state)
6104 {
6105 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6106 
6107 	switch (adev->asic_type) {
6108 	case CHIP_FIJI:
6109 	case CHIP_CARRIZO:
6110 	case CHIP_STONEY:
6111 		gfx_v8_0_update_gfx_clock_gating(adev,
6112 						 state == AMD_CG_STATE_GATE ? true : false);
6113 		break;
6114 	case CHIP_TONGA:
6115 		gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6116 		break;
6117 	case CHIP_POLARIS10:
6118 	case CHIP_POLARIS11:
6119 		gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6120 		break;
6121 	default:
6122 		break;
6123 	}
6124 	return 0;
6125 }
6126 
6127 static u32 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6128 {
6129 	return ring->adev->wb.wb[ring->rptr_offs];
6130 }
6131 
6132 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6133 {
6134 	struct amdgpu_device *adev = ring->adev;
6135 
6136 	if (ring->use_doorbell)
6137 		/* XXX check if swapping is necessary on BE */
6138 		return ring->adev->wb.wb[ring->wptr_offs];
6139 	else
6140 		return RREG32(mmCP_RB0_WPTR);
6141 }
6142 
6143 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6144 {
6145 	struct amdgpu_device *adev = ring->adev;
6146 
6147 	if (ring->use_doorbell) {
6148 		/* XXX check if swapping is necessary on BE */
6149 		adev->wb.wb[ring->wptr_offs] = ring->wptr;
6150 		WDOORBELL32(ring->doorbell_index, ring->wptr);
6151 	} else {
6152 		WREG32(mmCP_RB0_WPTR, ring->wptr);
6153 		(void)RREG32(mmCP_RB0_WPTR);
6154 	}
6155 }
6156 
6157 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6158 {
6159 	u32 ref_and_mask, reg_mem_engine;
6160 
6161 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
6162 		switch (ring->me) {
6163 		case 1:
6164 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6165 			break;
6166 		case 2:
6167 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6168 			break;
6169 		default:
6170 			return;
6171 		}
6172 		reg_mem_engine = 0;
6173 	} else {
6174 		ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6175 		reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6176 	}
6177 
6178 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6179 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6180 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
6181 				 reg_mem_engine));
6182 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6183 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6184 	amdgpu_ring_write(ring, ref_and_mask);
6185 	amdgpu_ring_write(ring, ref_and_mask);
6186 	amdgpu_ring_write(ring, 0x20); /* poll interval */
6187 }
6188 
6189 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6190 {
6191 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6192 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6193 				 WRITE_DATA_DST_SEL(0) |
6194 				 WR_CONFIRM));
6195 	amdgpu_ring_write(ring, mmHDP_DEBUG0);
6196 	amdgpu_ring_write(ring, 0);
6197 	amdgpu_ring_write(ring, 1);
6198 
6199 }
6200 
6201 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6202 				      struct amdgpu_ib *ib,
6203 				      unsigned vm_id, bool ctx_switch)
6204 {
6205 	u32 header, control = 0;
6206 
6207 	if (ib->flags & AMDGPU_IB_FLAG_CE)
6208 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6209 	else
6210 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6211 
6212 	control |= ib->length_dw | (vm_id << 24);
6213 
6214 	amdgpu_ring_write(ring, header);
6215 	amdgpu_ring_write(ring,
6216 #ifdef __BIG_ENDIAN
6217 			  (2 << 0) |
6218 #endif
6219 			  (ib->gpu_addr & 0xFFFFFFFC));
6220 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6221 	amdgpu_ring_write(ring, control);
6222 }
6223 
6224 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6225 					  struct amdgpu_ib *ib,
6226 					  unsigned vm_id, bool ctx_switch)
6227 {
6228 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6229 
6230 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6231 	amdgpu_ring_write(ring,
6232 #ifdef __BIG_ENDIAN
6233 				(2 << 0) |
6234 #endif
6235 				(ib->gpu_addr & 0xFFFFFFFC));
6236 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6237 	amdgpu_ring_write(ring, control);
6238 }
6239 
6240 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6241 					 u64 seq, unsigned flags)
6242 {
6243 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6244 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6245 
6246 	/* EVENT_WRITE_EOP - flush caches, send int */
6247 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6248 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6249 				 EOP_TC_ACTION_EN |
6250 				 EOP_TC_WB_ACTION_EN |
6251 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6252 				 EVENT_INDEX(5)));
6253 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6254 	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6255 			  DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6256 	amdgpu_ring_write(ring, lower_32_bits(seq));
6257 	amdgpu_ring_write(ring, upper_32_bits(seq));
6258 
6259 }
6260 
6261 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6262 {
6263 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6264 	uint32_t seq = ring->fence_drv.sync_seq;
6265 	uint64_t addr = ring->fence_drv.gpu_addr;
6266 
6267 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6268 	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6269 				 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6270 				 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6271 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6272 	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6273 	amdgpu_ring_write(ring, seq);
6274 	amdgpu_ring_write(ring, 0xffffffff);
6275 	amdgpu_ring_write(ring, 4); /* poll interval */
6276 }
6277 
6278 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6279 					unsigned vm_id, uint64_t pd_addr)
6280 {
6281 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6282 
6283 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6284 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6285 				 WRITE_DATA_DST_SEL(0)) |
6286 				 WR_CONFIRM);
6287 	if (vm_id < 8) {
6288 		amdgpu_ring_write(ring,
6289 				  (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6290 	} else {
6291 		amdgpu_ring_write(ring,
6292 				  (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6293 	}
6294 	amdgpu_ring_write(ring, 0);
6295 	amdgpu_ring_write(ring, pd_addr >> 12);
6296 
6297 	/* bits 0-15 are the VM contexts0-15 */
6298 	/* invalidate the cache */
6299 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6300 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6301 				 WRITE_DATA_DST_SEL(0)));
6302 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6303 	amdgpu_ring_write(ring, 0);
6304 	amdgpu_ring_write(ring, 1 << vm_id);
6305 
6306 	/* wait for the invalidate to complete */
6307 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6308 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6309 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6310 				 WAIT_REG_MEM_ENGINE(0))); /* me */
6311 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6312 	amdgpu_ring_write(ring, 0);
6313 	amdgpu_ring_write(ring, 0); /* ref */
6314 	amdgpu_ring_write(ring, 0); /* mask */
6315 	amdgpu_ring_write(ring, 0x20); /* poll interval */
6316 
6317 	/* compute doesn't have PFP */
6318 	if (usepfp) {
6319 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6320 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6321 		amdgpu_ring_write(ring, 0x0);
6322 		/* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */
6323 		amdgpu_ring_insert_nop(ring, 128);
6324 	}
6325 }
6326 
6327 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6328 {
6329 	return ring->adev->wb.wb[ring->wptr_offs];
6330 }
6331 
6332 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6333 {
6334 	struct amdgpu_device *adev = ring->adev;
6335 
6336 	/* XXX check if swapping is necessary on BE */
6337 	adev->wb.wb[ring->wptr_offs] = ring->wptr;
6338 	WDOORBELL32(ring->doorbell_index, ring->wptr);
6339 }
6340 
6341 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6342 					     u64 addr, u64 seq,
6343 					     unsigned flags)
6344 {
6345 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6346 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6347 
6348 	/* RELEASE_MEM - flush caches, send int */
6349 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6350 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6351 				 EOP_TC_ACTION_EN |
6352 				 EOP_TC_WB_ACTION_EN |
6353 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6354 				 EVENT_INDEX(5)));
6355 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6356 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6357 	amdgpu_ring_write(ring, upper_32_bits(addr));
6358 	amdgpu_ring_write(ring, lower_32_bits(seq));
6359 	amdgpu_ring_write(ring, upper_32_bits(seq));
6360 }
6361 
6362 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6363 {
6364 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6365 	amdgpu_ring_write(ring, 0);
6366 }
6367 
6368 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6369 {
6370 	uint32_t dw2 = 0;
6371 
6372 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6373 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6374 		/* set load_global_config & load_global_uconfig */
6375 		dw2 |= 0x8001;
6376 		/* set load_cs_sh_regs */
6377 		dw2 |= 0x01000000;
6378 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
6379 		dw2 |= 0x10002;
6380 
6381 		/* set load_ce_ram if preamble presented */
6382 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6383 			dw2 |= 0x10000000;
6384 	} else {
6385 		/* still load_ce_ram if this is the first time preamble presented
6386 		 * although there is no context switch happens.
6387 		 */
6388 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6389 			dw2 |= 0x10000000;
6390 	}
6391 
6392 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6393 	amdgpu_ring_write(ring, dw2);
6394 	amdgpu_ring_write(ring, 0);
6395 }
6396 
6397 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6398 						 enum amdgpu_interrupt_state state)
6399 {
6400 	WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6401 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6402 }
6403 
6404 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6405 						     int me, int pipe,
6406 						     enum amdgpu_interrupt_state state)
6407 {
6408 	/*
6409 	 * amdgpu controls only pipe 0 of MEC1. That's why this function only
6410 	 * handles the setting of interrupts for this specific pipe. All other
6411 	 * pipes' interrupts are set by amdkfd.
6412 	 */
6413 
6414 	if (me == 1) {
6415 		switch (pipe) {
6416 		case 0:
6417 			break;
6418 		default:
6419 			DRM_DEBUG("invalid pipe %d\n", pipe);
6420 			return;
6421 		}
6422 	} else {
6423 		DRM_DEBUG("invalid me %d\n", me);
6424 		return;
6425 	}
6426 
6427 	WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE,
6428 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6429 }
6430 
6431 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6432 					     struct amdgpu_irq_src *source,
6433 					     unsigned type,
6434 					     enum amdgpu_interrupt_state state)
6435 {
6436 	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6437 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6438 
6439 	return 0;
6440 }
6441 
6442 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6443 					      struct amdgpu_irq_src *source,
6444 					      unsigned type,
6445 					      enum amdgpu_interrupt_state state)
6446 {
6447 	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6448 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6449 
6450 	return 0;
6451 }
6452 
6453 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6454 					    struct amdgpu_irq_src *src,
6455 					    unsigned type,
6456 					    enum amdgpu_interrupt_state state)
6457 {
6458 	switch (type) {
6459 	case AMDGPU_CP_IRQ_GFX_EOP:
6460 		gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6461 		break;
6462 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6463 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6464 		break;
6465 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6466 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6467 		break;
6468 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6469 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6470 		break;
6471 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6472 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6473 		break;
6474 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6475 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6476 		break;
6477 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6478 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6479 		break;
6480 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6481 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6482 		break;
6483 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6484 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6485 		break;
6486 	default:
6487 		break;
6488 	}
6489 	return 0;
6490 }
6491 
6492 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6493 			    struct amdgpu_irq_src *source,
6494 			    struct amdgpu_iv_entry *entry)
6495 {
6496 	int i;
6497 	u8 me_id, pipe_id, queue_id;
6498 	struct amdgpu_ring *ring;
6499 
6500 	DRM_DEBUG("IH: CP EOP\n");
6501 	me_id = (entry->ring_id & 0x0c) >> 2;
6502 	pipe_id = (entry->ring_id & 0x03) >> 0;
6503 	queue_id = (entry->ring_id & 0x70) >> 4;
6504 
6505 	switch (me_id) {
6506 	case 0:
6507 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6508 		break;
6509 	case 1:
6510 	case 2:
6511 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6512 			ring = &adev->gfx.compute_ring[i];
6513 			/* Per-queue interrupt is supported for MEC starting from VI.
6514 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
6515 			  */
6516 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6517 				amdgpu_fence_process(ring);
6518 		}
6519 		break;
6520 	}
6521 	return 0;
6522 }
6523 
6524 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6525 				 struct amdgpu_irq_src *source,
6526 				 struct amdgpu_iv_entry *entry)
6527 {
6528 	DRM_ERROR("Illegal register access in command stream\n");
6529 	schedule_work(&adev->reset_work);
6530 	return 0;
6531 }
6532 
6533 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6534 				  struct amdgpu_irq_src *source,
6535 				  struct amdgpu_iv_entry *entry)
6536 {
6537 	DRM_ERROR("Illegal instruction in command stream\n");
6538 	schedule_work(&adev->reset_work);
6539 	return 0;
6540 }
6541 
6542 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6543 	.name = "gfx_v8_0",
6544 	.early_init = gfx_v8_0_early_init,
6545 	.late_init = gfx_v8_0_late_init,
6546 	.sw_init = gfx_v8_0_sw_init,
6547 	.sw_fini = gfx_v8_0_sw_fini,
6548 	.hw_init = gfx_v8_0_hw_init,
6549 	.hw_fini = gfx_v8_0_hw_fini,
6550 	.suspend = gfx_v8_0_suspend,
6551 	.resume = gfx_v8_0_resume,
6552 	.is_idle = gfx_v8_0_is_idle,
6553 	.wait_for_idle = gfx_v8_0_wait_for_idle,
6554 	.check_soft_reset = gfx_v8_0_check_soft_reset,
6555 	.pre_soft_reset = gfx_v8_0_pre_soft_reset,
6556 	.soft_reset = gfx_v8_0_soft_reset,
6557 	.post_soft_reset = gfx_v8_0_post_soft_reset,
6558 	.set_clockgating_state = gfx_v8_0_set_clockgating_state,
6559 	.set_powergating_state = gfx_v8_0_set_powergating_state,
6560 };
6561 
6562 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6563 	.type = AMDGPU_RING_TYPE_GFX,
6564 	.align_mask = 0xff,
6565 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6566 	.get_rptr = gfx_v8_0_ring_get_rptr,
6567 	.get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6568 	.set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6569 	.emit_frame_size =
6570 		20 + /* gfx_v8_0_ring_emit_gds_switch */
6571 		7 + /* gfx_v8_0_ring_emit_hdp_flush */
6572 		5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6573 		6 + 6 + 6 +/* gfx_v8_0_ring_emit_fence_gfx x3 for user fence, vm fence */
6574 		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6575 		128 + 19 + /* gfx_v8_0_ring_emit_vm_flush */
6576 		2 + /* gfx_v8_ring_emit_sb */
6577 		3, /* gfx_v8_ring_emit_cntxcntl */
6578 	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_gfx */
6579 	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6580 	.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6581 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6582 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6583 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6584 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6585 	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6586 	.test_ring = gfx_v8_0_ring_test_ring,
6587 	.test_ib = gfx_v8_0_ring_test_ib,
6588 	.insert_nop = amdgpu_ring_insert_nop,
6589 	.pad_ib = amdgpu_ring_generic_pad_ib,
6590 	.emit_switch_buffer = gfx_v8_ring_emit_sb,
6591 	.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6592 };
6593 
6594 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6595 	.type = AMDGPU_RING_TYPE_COMPUTE,
6596 	.align_mask = 0xff,
6597 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6598 	.get_rptr = gfx_v8_0_ring_get_rptr,
6599 	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
6600 	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
6601 	.emit_frame_size =
6602 		20 + /* gfx_v8_0_ring_emit_gds_switch */
6603 		7 + /* gfx_v8_0_ring_emit_hdp_flush */
6604 		5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6605 		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6606 		17 + /* gfx_v8_0_ring_emit_vm_flush */
6607 		7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6608 	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_compute */
6609 	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
6610 	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
6611 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6612 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6613 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6614 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6615 	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6616 	.test_ring = gfx_v8_0_ring_test_ring,
6617 	.test_ib = gfx_v8_0_ring_test_ib,
6618 	.insert_nop = amdgpu_ring_insert_nop,
6619 	.pad_ib = amdgpu_ring_generic_pad_ib,
6620 };
6621 
6622 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6623 {
6624 	int i;
6625 
6626 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6627 		adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6628 
6629 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6630 		adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6631 }
6632 
6633 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6634 	.set = gfx_v8_0_set_eop_interrupt_state,
6635 	.process = gfx_v8_0_eop_irq,
6636 };
6637 
6638 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6639 	.set = gfx_v8_0_set_priv_reg_fault_state,
6640 	.process = gfx_v8_0_priv_reg_irq,
6641 };
6642 
6643 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6644 	.set = gfx_v8_0_set_priv_inst_fault_state,
6645 	.process = gfx_v8_0_priv_inst_irq,
6646 };
6647 
6648 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6649 {
6650 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6651 	adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6652 
6653 	adev->gfx.priv_reg_irq.num_types = 1;
6654 	adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6655 
6656 	adev->gfx.priv_inst_irq.num_types = 1;
6657 	adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6658 }
6659 
6660 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6661 {
6662 	switch (adev->asic_type) {
6663 	case CHIP_TOPAZ:
6664 		adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6665 		break;
6666 	case CHIP_STONEY:
6667 	case CHIP_CARRIZO:
6668 		adev->gfx.rlc.funcs = &cz_rlc_funcs;
6669 		break;
6670 	default:
6671 		adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
6672 		break;
6673 	}
6674 }
6675 
6676 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6677 {
6678 	/* init asci gds info */
6679 	adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6680 	adev->gds.gws.total_size = 64;
6681 	adev->gds.oa.total_size = 16;
6682 
6683 	if (adev->gds.mem.total_size == 64 * 1024) {
6684 		adev->gds.mem.gfx_partition_size = 4096;
6685 		adev->gds.mem.cs_partition_size = 4096;
6686 
6687 		adev->gds.gws.gfx_partition_size = 4;
6688 		adev->gds.gws.cs_partition_size = 4;
6689 
6690 		adev->gds.oa.gfx_partition_size = 4;
6691 		adev->gds.oa.cs_partition_size = 1;
6692 	} else {
6693 		adev->gds.mem.gfx_partition_size = 1024;
6694 		adev->gds.mem.cs_partition_size = 1024;
6695 
6696 		adev->gds.gws.gfx_partition_size = 16;
6697 		adev->gds.gws.cs_partition_size = 16;
6698 
6699 		adev->gds.oa.gfx_partition_size = 4;
6700 		adev->gds.oa.cs_partition_size = 4;
6701 	}
6702 }
6703 
6704 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6705 						 u32 bitmap)
6706 {
6707 	u32 data;
6708 
6709 	if (!bitmap)
6710 		return;
6711 
6712 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6713 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6714 
6715 	WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6716 }
6717 
6718 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6719 {
6720 	u32 data, mask;
6721 
6722 	data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
6723 		RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6724 
6725 	mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
6726 
6727 	return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
6728 }
6729 
6730 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6731 {
6732 	int i, j, k, counter, active_cu_number = 0;
6733 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6734 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6735 	unsigned disable_masks[4 * 2];
6736 
6737 	memset(cu_info, 0, sizeof(*cu_info));
6738 
6739 	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
6740 
6741 	mutex_lock(&adev->grbm_idx_mutex);
6742 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6743 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6744 			mask = 1;
6745 			ao_bitmap = 0;
6746 			counter = 0;
6747 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
6748 			if (i < 4 && j < 2)
6749 				gfx_v8_0_set_user_cu_inactive_bitmap(
6750 					adev, disable_masks[i * 2 + j]);
6751 			bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6752 			cu_info->bitmap[i][j] = bitmap;
6753 
6754 			for (k = 0; k < 16; k ++) {
6755 				if (bitmap & mask) {
6756 					if (counter < 2)
6757 						ao_bitmap |= mask;
6758 					counter ++;
6759 				}
6760 				mask <<= 1;
6761 			}
6762 			active_cu_number += counter;
6763 			ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6764 		}
6765 	}
6766 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6767 	mutex_unlock(&adev->grbm_idx_mutex);
6768 
6769 	cu_info->number = active_cu_number;
6770 	cu_info->ao_cu_mask = ao_cu_mask;
6771 }
6772 
6773 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
6774 {
6775 	.type = AMD_IP_BLOCK_TYPE_GFX,
6776 	.major = 8,
6777 	.minor = 0,
6778 	.rev = 0,
6779 	.funcs = &gfx_v8_0_ip_funcs,
6780 };
6781 
6782 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
6783 {
6784 	.type = AMD_IP_BLOCK_TYPE_GFX,
6785 	.major = 8,
6786 	.minor = 1,
6787 	.rev = 0,
6788 	.funcs = &gfx_v8_0_ip_funcs,
6789 };
6790