xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c (revision 82003e04)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vid.h"
29 #include "amdgpu_ucode.h"
30 #include "amdgpu_atombios.h"
31 #include "atombios_i2c.h"
32 #include "clearstate_vi.h"
33 
34 #include "gmc/gmc_8_2_d.h"
35 #include "gmc/gmc_8_2_sh_mask.h"
36 
37 #include "oss/oss_3_0_d.h"
38 #include "oss/oss_3_0_sh_mask.h"
39 
40 #include "bif/bif_5_0_d.h"
41 #include "bif/bif_5_0_sh_mask.h"
42 
43 #include "gca/gfx_8_0_d.h"
44 #include "gca/gfx_8_0_enum.h"
45 #include "gca/gfx_8_0_sh_mask.h"
46 #include "gca/gfx_8_0_enum.h"
47 
48 #include "dce/dce_10_0_d.h"
49 #include "dce/dce_10_0_sh_mask.h"
50 
51 #include "smu/smu_7_1_3_d.h"
52 
53 #define GFX8_NUM_GFX_RINGS     1
54 #define GFX8_NUM_COMPUTE_RINGS 8
55 
56 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
59 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
60 
61 #define ARRAY_MODE(x)					((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
62 #define PIPE_CONFIG(x)					((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
63 #define TILE_SPLIT(x)					((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
64 #define MICRO_TILE_MODE_NEW(x)				((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
65 #define SAMPLE_SPLIT(x)					((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
66 #define BANK_WIDTH(x)					((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
67 #define BANK_HEIGHT(x)					((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
68 #define MACRO_TILE_ASPECT(x)				((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
69 #define NUM_BANKS(x)					((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
70 
71 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
72 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
73 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
74 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
76 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
77 
78 /* BPM SERDES CMD */
79 #define SET_BPM_SERDES_CMD    1
80 #define CLE_BPM_SERDES_CMD    0
81 
82 /* BPM Register Address*/
83 enum {
84 	BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
85 	BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
86 	BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
87 	BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
88 	BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
89 	BPM_REG_FGCG_MAX
90 };
91 
92 #define RLC_FormatDirectRegListLength        14
93 
94 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
95 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
100 
101 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
106 
107 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
113 
114 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
119 
120 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
126 
127 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
128 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
133 
134 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
140 
141 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
142 {
143 	{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
144 	{mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
145 	{mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
146 	{mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
147 	{mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
148 	{mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
149 	{mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
150 	{mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
151 	{mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
152 	{mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
153 	{mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
154 	{mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
155 	{mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
156 	{mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
157 	{mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
158 	{mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
159 };
160 
161 static const u32 golden_settings_tonga_a11[] =
162 {
163 	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
164 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
165 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
166 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
167 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
168 	mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
169 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
170 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
171 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
172 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
173 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
174 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
175 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
176 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
177 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
178 };
179 
180 static const u32 tonga_golden_common_all[] =
181 {
182 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
183 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
184 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
185 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
186 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
187 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
188 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
189 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
190 };
191 
192 static const u32 tonga_mgcg_cgcg_init[] =
193 {
194 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
195 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
196 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
197 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
198 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
199 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
200 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
201 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
202 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
203 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
204 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
205 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
206 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
207 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
208 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
209 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
210 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
211 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
212 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
213 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
214 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
215 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
216 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
217 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
218 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
219 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
220 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
221 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
222 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
223 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
224 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
225 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
226 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
227 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
228 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
229 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
230 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
231 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
232 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
233 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
234 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
235 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
236 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
237 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
238 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
239 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
240 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
241 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
242 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
243 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
244 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
245 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
246 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
247 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
248 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
249 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
250 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
251 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
252 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
253 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
254 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
255 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
256 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
257 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
258 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
259 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
260 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
261 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
262 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
263 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
264 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
265 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
266 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
267 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
268 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
269 };
270 
271 static const u32 golden_settings_polaris11_a11[] =
272 {
273 	mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
274 	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
275 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
276 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
277 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
278 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
279 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
280 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
281 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
282 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
283 	mmSQ_CONFIG, 0x07f80000, 0x01180000,
284 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
285 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
286 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
287 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
288 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
289 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
290 };
291 
292 static const u32 polaris11_golden_common_all[] =
293 {
294 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
295 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
296 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
297 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
298 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
299 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
300 };
301 
302 static const u32 golden_settings_polaris10_a11[] =
303 {
304 	mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
305 	mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
306 	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
307 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
308 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
309 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
310 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
311 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
312 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
313 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
314 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
315 	mmSQ_CONFIG, 0x07f80000, 0x07180000,
316 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
317 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
318 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
319 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
320 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
321 };
322 
323 static const u32 polaris10_golden_common_all[] =
324 {
325 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
326 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
327 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
328 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
329 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
330 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
331 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
332 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
333 };
334 
335 static const u32 fiji_golden_common_all[] =
336 {
337 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
338 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
339 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
340 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
341 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
342 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
343 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
344 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
345 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
346 	mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
347 };
348 
349 static const u32 golden_settings_fiji_a10[] =
350 {
351 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
352 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
353 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
354 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
355 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
356 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
357 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
358 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
359 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
360 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
361 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
362 };
363 
364 static const u32 fiji_mgcg_cgcg_init[] =
365 {
366 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
367 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
368 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
369 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
370 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
371 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
372 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
373 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
374 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
375 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
376 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
377 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
378 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
379 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
380 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
381 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
382 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
383 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
384 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
385 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
386 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
387 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
388 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
389 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
390 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
391 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
392 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
393 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
394 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
395 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
396 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
397 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
398 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
399 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
400 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
401 };
402 
403 static const u32 golden_settings_iceland_a11[] =
404 {
405 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
406 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
407 	mmDB_DEBUG3, 0xc0000000, 0xc0000000,
408 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
409 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
410 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
411 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
412 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
413 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
414 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
415 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
416 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
417 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
418 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
419 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
420 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
421 };
422 
423 static const u32 iceland_golden_common_all[] =
424 {
425 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
426 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
427 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
428 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
429 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
430 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
431 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
432 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
433 };
434 
435 static const u32 iceland_mgcg_cgcg_init[] =
436 {
437 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
438 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
439 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
440 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
441 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
442 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
443 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
444 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
445 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
446 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
447 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
448 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
449 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
450 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
451 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
452 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
453 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
454 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
455 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
456 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
457 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
458 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
459 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
460 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
461 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
462 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
463 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
464 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
465 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
466 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
467 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
468 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
469 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
470 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
471 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
472 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
473 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
474 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
475 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
476 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
477 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
478 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
479 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
480 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
481 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
482 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
483 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
484 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
485 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
486 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
487 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
488 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
489 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
490 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
491 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
492 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
493 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
494 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
495 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
496 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
497 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
498 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
499 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
500 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
501 };
502 
503 static const u32 cz_golden_settings_a11[] =
504 {
505 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
506 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
507 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
508 	mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
509 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
510 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
511 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
512 	mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
513 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
514 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
515 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
516 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
517 };
518 
519 static const u32 cz_golden_common_all[] =
520 {
521 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
522 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
523 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
524 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
525 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
526 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
527 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
528 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
529 };
530 
531 static const u32 cz_mgcg_cgcg_init[] =
532 {
533 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
534 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
535 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
536 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
537 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
538 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
539 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
540 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
541 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
542 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
543 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
544 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
545 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
546 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
547 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
548 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
549 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
550 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
551 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
552 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
553 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
554 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
555 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
556 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
557 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
558 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
559 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
560 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
561 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
562 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
563 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
564 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
565 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
566 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
567 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
568 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
569 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
570 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
571 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
572 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
573 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
574 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
575 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
576 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
577 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
578 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
579 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
580 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
581 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
582 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
583 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
584 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
585 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
586 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
587 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
588 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
589 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
590 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
591 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
592 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
593 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
594 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
595 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
596 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
597 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
598 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
599 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
600 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
601 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
602 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
603 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
604 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
605 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
606 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
607 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
608 };
609 
610 static const u32 stoney_golden_settings_a11[] =
611 {
612 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
613 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
614 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
615 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
616 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
617 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
618 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
619 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
620 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
621 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
622 };
623 
624 static const u32 stoney_golden_common_all[] =
625 {
626 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
627 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
628 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
629 	mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
630 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
631 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
632 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
633 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
634 };
635 
636 static const u32 stoney_mgcg_cgcg_init[] =
637 {
638 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
639 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
640 	mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
641 	mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
642 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
643 };
644 
645 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
646 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
647 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
648 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
649 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
650 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
651 
652 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
653 {
654 	switch (adev->asic_type) {
655 	case CHIP_TOPAZ:
656 		amdgpu_program_register_sequence(adev,
657 						 iceland_mgcg_cgcg_init,
658 						 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
659 		amdgpu_program_register_sequence(adev,
660 						 golden_settings_iceland_a11,
661 						 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
662 		amdgpu_program_register_sequence(adev,
663 						 iceland_golden_common_all,
664 						 (const u32)ARRAY_SIZE(iceland_golden_common_all));
665 		break;
666 	case CHIP_FIJI:
667 		amdgpu_program_register_sequence(adev,
668 						 fiji_mgcg_cgcg_init,
669 						 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
670 		amdgpu_program_register_sequence(adev,
671 						 golden_settings_fiji_a10,
672 						 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
673 		amdgpu_program_register_sequence(adev,
674 						 fiji_golden_common_all,
675 						 (const u32)ARRAY_SIZE(fiji_golden_common_all));
676 		break;
677 
678 	case CHIP_TONGA:
679 		amdgpu_program_register_sequence(adev,
680 						 tonga_mgcg_cgcg_init,
681 						 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
682 		amdgpu_program_register_sequence(adev,
683 						 golden_settings_tonga_a11,
684 						 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
685 		amdgpu_program_register_sequence(adev,
686 						 tonga_golden_common_all,
687 						 (const u32)ARRAY_SIZE(tonga_golden_common_all));
688 		break;
689 	case CHIP_POLARIS11:
690 		amdgpu_program_register_sequence(adev,
691 						 golden_settings_polaris11_a11,
692 						 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
693 		amdgpu_program_register_sequence(adev,
694 						 polaris11_golden_common_all,
695 						 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
696 		break;
697 	case CHIP_POLARIS10:
698 		amdgpu_program_register_sequence(adev,
699 						 golden_settings_polaris10_a11,
700 						 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
701 		amdgpu_program_register_sequence(adev,
702 						 polaris10_golden_common_all,
703 						 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
704 		WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
705 		if (adev->pdev->revision == 0xc7 &&
706 		    ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
707 		     (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
708 		     (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
709 			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
710 			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
711 		}
712 		break;
713 	case CHIP_CARRIZO:
714 		amdgpu_program_register_sequence(adev,
715 						 cz_mgcg_cgcg_init,
716 						 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
717 		amdgpu_program_register_sequence(adev,
718 						 cz_golden_settings_a11,
719 						 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
720 		amdgpu_program_register_sequence(adev,
721 						 cz_golden_common_all,
722 						 (const u32)ARRAY_SIZE(cz_golden_common_all));
723 		break;
724 	case CHIP_STONEY:
725 		amdgpu_program_register_sequence(adev,
726 						 stoney_mgcg_cgcg_init,
727 						 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
728 		amdgpu_program_register_sequence(adev,
729 						 stoney_golden_settings_a11,
730 						 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
731 		amdgpu_program_register_sequence(adev,
732 						 stoney_golden_common_all,
733 						 (const u32)ARRAY_SIZE(stoney_golden_common_all));
734 		break;
735 	default:
736 		break;
737 	}
738 }
739 
740 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
741 {
742 	int i;
743 
744 	adev->gfx.scratch.num_reg = 7;
745 	adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
746 	for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
747 		adev->gfx.scratch.free[i] = true;
748 		adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
749 	}
750 }
751 
752 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
753 {
754 	struct amdgpu_device *adev = ring->adev;
755 	uint32_t scratch;
756 	uint32_t tmp = 0;
757 	unsigned i;
758 	int r;
759 
760 	r = amdgpu_gfx_scratch_get(adev, &scratch);
761 	if (r) {
762 		DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
763 		return r;
764 	}
765 	WREG32(scratch, 0xCAFEDEAD);
766 	r = amdgpu_ring_alloc(ring, 3);
767 	if (r) {
768 		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
769 			  ring->idx, r);
770 		amdgpu_gfx_scratch_free(adev, scratch);
771 		return r;
772 	}
773 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
774 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
775 	amdgpu_ring_write(ring, 0xDEADBEEF);
776 	amdgpu_ring_commit(ring);
777 
778 	for (i = 0; i < adev->usec_timeout; i++) {
779 		tmp = RREG32(scratch);
780 		if (tmp == 0xDEADBEEF)
781 			break;
782 		DRM_UDELAY(1);
783 	}
784 	if (i < adev->usec_timeout) {
785 		DRM_INFO("ring test on %d succeeded in %d usecs\n",
786 			 ring->idx, i);
787 	} else {
788 		DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
789 			  ring->idx, scratch, tmp);
790 		r = -EINVAL;
791 	}
792 	amdgpu_gfx_scratch_free(adev, scratch);
793 	return r;
794 }
795 
796 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
797 {
798 	struct amdgpu_device *adev = ring->adev;
799 	struct amdgpu_ib ib;
800 	struct fence *f = NULL;
801 	uint32_t scratch;
802 	uint32_t tmp = 0;
803 	long r;
804 
805 	r = amdgpu_gfx_scratch_get(adev, &scratch);
806 	if (r) {
807 		DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
808 		return r;
809 	}
810 	WREG32(scratch, 0xCAFEDEAD);
811 	memset(&ib, 0, sizeof(ib));
812 	r = amdgpu_ib_get(adev, NULL, 256, &ib);
813 	if (r) {
814 		DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
815 		goto err1;
816 	}
817 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
818 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
819 	ib.ptr[2] = 0xDEADBEEF;
820 	ib.length_dw = 3;
821 
822 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
823 	if (r)
824 		goto err2;
825 
826 	r = fence_wait_timeout(f, false, timeout);
827 	if (r == 0) {
828 		DRM_ERROR("amdgpu: IB test timed out.\n");
829 		r = -ETIMEDOUT;
830 		goto err2;
831 	} else if (r < 0) {
832 		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
833 		goto err2;
834 	}
835 	tmp = RREG32(scratch);
836 	if (tmp == 0xDEADBEEF) {
837 		DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
838 		r = 0;
839 	} else {
840 		DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
841 			  scratch, tmp);
842 		r = -EINVAL;
843 	}
844 err2:
845 	amdgpu_ib_free(adev, &ib, NULL);
846 	fence_put(f);
847 err1:
848 	amdgpu_gfx_scratch_free(adev, scratch);
849 	return r;
850 }
851 
852 
853 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
854 	release_firmware(adev->gfx.pfp_fw);
855 	adev->gfx.pfp_fw = NULL;
856 	release_firmware(adev->gfx.me_fw);
857 	adev->gfx.me_fw = NULL;
858 	release_firmware(adev->gfx.ce_fw);
859 	adev->gfx.ce_fw = NULL;
860 	release_firmware(adev->gfx.rlc_fw);
861 	adev->gfx.rlc_fw = NULL;
862 	release_firmware(adev->gfx.mec_fw);
863 	adev->gfx.mec_fw = NULL;
864 	if ((adev->asic_type != CHIP_STONEY) &&
865 	    (adev->asic_type != CHIP_TOPAZ))
866 		release_firmware(adev->gfx.mec2_fw);
867 	adev->gfx.mec2_fw = NULL;
868 
869 	kfree(adev->gfx.rlc.register_list_format);
870 }
871 
872 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
873 {
874 	const char *chip_name;
875 	char fw_name[30];
876 	int err;
877 	struct amdgpu_firmware_info *info = NULL;
878 	const struct common_firmware_header *header = NULL;
879 	const struct gfx_firmware_header_v1_0 *cp_hdr;
880 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
881 	unsigned int *tmp = NULL, i;
882 
883 	DRM_DEBUG("\n");
884 
885 	switch (adev->asic_type) {
886 	case CHIP_TOPAZ:
887 		chip_name = "topaz";
888 		break;
889 	case CHIP_TONGA:
890 		chip_name = "tonga";
891 		break;
892 	case CHIP_CARRIZO:
893 		chip_name = "carrizo";
894 		break;
895 	case CHIP_FIJI:
896 		chip_name = "fiji";
897 		break;
898 	case CHIP_POLARIS11:
899 		chip_name = "polaris11";
900 		break;
901 	case CHIP_POLARIS10:
902 		chip_name = "polaris10";
903 		break;
904 	case CHIP_STONEY:
905 		chip_name = "stoney";
906 		break;
907 	default:
908 		BUG();
909 	}
910 
911 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
912 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
913 	if (err)
914 		goto out;
915 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
916 	if (err)
917 		goto out;
918 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
919 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
920 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
921 
922 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
923 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
924 	if (err)
925 		goto out;
926 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
927 	if (err)
928 		goto out;
929 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
930 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
931 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
932 
933 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
934 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
935 	if (err)
936 		goto out;
937 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
938 	if (err)
939 		goto out;
940 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
941 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
942 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
943 
944 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
945 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
946 	if (err)
947 		goto out;
948 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
949 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
950 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
951 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
952 
953 	adev->gfx.rlc.save_and_restore_offset =
954 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
955 	adev->gfx.rlc.clear_state_descriptor_offset =
956 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
957 	adev->gfx.rlc.avail_scratch_ram_locations =
958 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
959 	adev->gfx.rlc.reg_restore_list_size =
960 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
961 	adev->gfx.rlc.reg_list_format_start =
962 			le32_to_cpu(rlc_hdr->reg_list_format_start);
963 	adev->gfx.rlc.reg_list_format_separate_start =
964 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
965 	adev->gfx.rlc.starting_offsets_start =
966 			le32_to_cpu(rlc_hdr->starting_offsets_start);
967 	adev->gfx.rlc.reg_list_format_size_bytes =
968 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
969 	adev->gfx.rlc.reg_list_size_bytes =
970 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
971 
972 	adev->gfx.rlc.register_list_format =
973 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
974 					adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
975 
976 	if (!adev->gfx.rlc.register_list_format) {
977 		err = -ENOMEM;
978 		goto out;
979 	}
980 
981 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
982 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
983 	for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
984 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
985 
986 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
987 
988 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
989 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
990 	for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
991 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
992 
993 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
994 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
995 	if (err)
996 		goto out;
997 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
998 	if (err)
999 		goto out;
1000 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1001 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1002 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1003 
1004 	if ((adev->asic_type != CHIP_STONEY) &&
1005 	    (adev->asic_type != CHIP_TOPAZ)) {
1006 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1007 		err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1008 		if (!err) {
1009 			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1010 			if (err)
1011 				goto out;
1012 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1013 				adev->gfx.mec2_fw->data;
1014 			adev->gfx.mec2_fw_version =
1015 				le32_to_cpu(cp_hdr->header.ucode_version);
1016 			adev->gfx.mec2_feature_version =
1017 				le32_to_cpu(cp_hdr->ucode_feature_version);
1018 		} else {
1019 			err = 0;
1020 			adev->gfx.mec2_fw = NULL;
1021 		}
1022 	}
1023 
1024 	if (adev->firmware.smu_load) {
1025 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1026 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1027 		info->fw = adev->gfx.pfp_fw;
1028 		header = (const struct common_firmware_header *)info->fw->data;
1029 		adev->firmware.fw_size +=
1030 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1031 
1032 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1033 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1034 		info->fw = adev->gfx.me_fw;
1035 		header = (const struct common_firmware_header *)info->fw->data;
1036 		adev->firmware.fw_size +=
1037 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1038 
1039 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1040 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1041 		info->fw = adev->gfx.ce_fw;
1042 		header = (const struct common_firmware_header *)info->fw->data;
1043 		adev->firmware.fw_size +=
1044 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1045 
1046 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1047 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1048 		info->fw = adev->gfx.rlc_fw;
1049 		header = (const struct common_firmware_header *)info->fw->data;
1050 		adev->firmware.fw_size +=
1051 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1052 
1053 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1054 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1055 		info->fw = adev->gfx.mec_fw;
1056 		header = (const struct common_firmware_header *)info->fw->data;
1057 		adev->firmware.fw_size +=
1058 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1059 
1060 		if (adev->gfx.mec2_fw) {
1061 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1062 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1063 			info->fw = adev->gfx.mec2_fw;
1064 			header = (const struct common_firmware_header *)info->fw->data;
1065 			adev->firmware.fw_size +=
1066 				ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1067 		}
1068 
1069 	}
1070 
1071 out:
1072 	if (err) {
1073 		dev_err(adev->dev,
1074 			"gfx8: Failed to load firmware \"%s\"\n",
1075 			fw_name);
1076 		release_firmware(adev->gfx.pfp_fw);
1077 		adev->gfx.pfp_fw = NULL;
1078 		release_firmware(adev->gfx.me_fw);
1079 		adev->gfx.me_fw = NULL;
1080 		release_firmware(adev->gfx.ce_fw);
1081 		adev->gfx.ce_fw = NULL;
1082 		release_firmware(adev->gfx.rlc_fw);
1083 		adev->gfx.rlc_fw = NULL;
1084 		release_firmware(adev->gfx.mec_fw);
1085 		adev->gfx.mec_fw = NULL;
1086 		release_firmware(adev->gfx.mec2_fw);
1087 		adev->gfx.mec2_fw = NULL;
1088 	}
1089 	return err;
1090 }
1091 
1092 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1093 				    volatile u32 *buffer)
1094 {
1095 	u32 count = 0, i;
1096 	const struct cs_section_def *sect = NULL;
1097 	const struct cs_extent_def *ext = NULL;
1098 
1099 	if (adev->gfx.rlc.cs_data == NULL)
1100 		return;
1101 	if (buffer == NULL)
1102 		return;
1103 
1104 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1105 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1106 
1107 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1108 	buffer[count++] = cpu_to_le32(0x80000000);
1109 	buffer[count++] = cpu_to_le32(0x80000000);
1110 
1111 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1112 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1113 			if (sect->id == SECT_CONTEXT) {
1114 				buffer[count++] =
1115 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1116 				buffer[count++] = cpu_to_le32(ext->reg_index -
1117 						PACKET3_SET_CONTEXT_REG_START);
1118 				for (i = 0; i < ext->reg_count; i++)
1119 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1120 			} else {
1121 				return;
1122 			}
1123 		}
1124 	}
1125 
1126 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1127 	buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1128 			PACKET3_SET_CONTEXT_REG_START);
1129 	switch (adev->asic_type) {
1130 	case CHIP_TONGA:
1131 	case CHIP_POLARIS10:
1132 		buffer[count++] = cpu_to_le32(0x16000012);
1133 		buffer[count++] = cpu_to_le32(0x0000002A);
1134 		break;
1135 	case CHIP_POLARIS11:
1136 		buffer[count++] = cpu_to_le32(0x16000012);
1137 		buffer[count++] = cpu_to_le32(0x00000000);
1138 		break;
1139 	case CHIP_FIJI:
1140 		buffer[count++] = cpu_to_le32(0x3a00161a);
1141 		buffer[count++] = cpu_to_le32(0x0000002e);
1142 		break;
1143 	case CHIP_TOPAZ:
1144 	case CHIP_CARRIZO:
1145 		buffer[count++] = cpu_to_le32(0x00000002);
1146 		buffer[count++] = cpu_to_le32(0x00000000);
1147 		break;
1148 	case CHIP_STONEY:
1149 		buffer[count++] = cpu_to_le32(0x00000000);
1150 		buffer[count++] = cpu_to_le32(0x00000000);
1151 		break;
1152 	default:
1153 		buffer[count++] = cpu_to_le32(0x00000000);
1154 		buffer[count++] = cpu_to_le32(0x00000000);
1155 		break;
1156 	}
1157 
1158 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1159 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1160 
1161 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1162 	buffer[count++] = cpu_to_le32(0);
1163 }
1164 
1165 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1166 {
1167 	const __le32 *fw_data;
1168 	volatile u32 *dst_ptr;
1169 	int me, i, max_me = 4;
1170 	u32 bo_offset = 0;
1171 	u32 table_offset, table_size;
1172 
1173 	if (adev->asic_type == CHIP_CARRIZO)
1174 		max_me = 5;
1175 
1176 	/* write the cp table buffer */
1177 	dst_ptr = adev->gfx.rlc.cp_table_ptr;
1178 	for (me = 0; me < max_me; me++) {
1179 		if (me == 0) {
1180 			const struct gfx_firmware_header_v1_0 *hdr =
1181 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1182 			fw_data = (const __le32 *)
1183 				(adev->gfx.ce_fw->data +
1184 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1185 			table_offset = le32_to_cpu(hdr->jt_offset);
1186 			table_size = le32_to_cpu(hdr->jt_size);
1187 		} else if (me == 1) {
1188 			const struct gfx_firmware_header_v1_0 *hdr =
1189 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1190 			fw_data = (const __le32 *)
1191 				(adev->gfx.pfp_fw->data +
1192 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1193 			table_offset = le32_to_cpu(hdr->jt_offset);
1194 			table_size = le32_to_cpu(hdr->jt_size);
1195 		} else if (me == 2) {
1196 			const struct gfx_firmware_header_v1_0 *hdr =
1197 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1198 			fw_data = (const __le32 *)
1199 				(adev->gfx.me_fw->data +
1200 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1201 			table_offset = le32_to_cpu(hdr->jt_offset);
1202 			table_size = le32_to_cpu(hdr->jt_size);
1203 		} else if (me == 3) {
1204 			const struct gfx_firmware_header_v1_0 *hdr =
1205 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1206 			fw_data = (const __le32 *)
1207 				(adev->gfx.mec_fw->data +
1208 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1209 			table_offset = le32_to_cpu(hdr->jt_offset);
1210 			table_size = le32_to_cpu(hdr->jt_size);
1211 		} else  if (me == 4) {
1212 			const struct gfx_firmware_header_v1_0 *hdr =
1213 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1214 			fw_data = (const __le32 *)
1215 				(adev->gfx.mec2_fw->data +
1216 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1217 			table_offset = le32_to_cpu(hdr->jt_offset);
1218 			table_size = le32_to_cpu(hdr->jt_size);
1219 		}
1220 
1221 		for (i = 0; i < table_size; i ++) {
1222 			dst_ptr[bo_offset + i] =
1223 				cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1224 		}
1225 
1226 		bo_offset += table_size;
1227 	}
1228 }
1229 
1230 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1231 {
1232 	int r;
1233 
1234 	/* clear state block */
1235 	if (adev->gfx.rlc.clear_state_obj) {
1236 		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1237 		if (unlikely(r != 0))
1238 			dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
1239 		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1240 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1241 		amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1242 		adev->gfx.rlc.clear_state_obj = NULL;
1243 	}
1244 
1245 	/* jump table block */
1246 	if (adev->gfx.rlc.cp_table_obj) {
1247 		r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1248 		if (unlikely(r != 0))
1249 			dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1250 		amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1251 		amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1252 		amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1253 		adev->gfx.rlc.cp_table_obj = NULL;
1254 	}
1255 }
1256 
1257 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1258 {
1259 	volatile u32 *dst_ptr;
1260 	u32 dws;
1261 	const struct cs_section_def *cs_data;
1262 	int r;
1263 
1264 	adev->gfx.rlc.cs_data = vi_cs_data;
1265 
1266 	cs_data = adev->gfx.rlc.cs_data;
1267 
1268 	if (cs_data) {
1269 		/* clear state block */
1270 		adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1271 
1272 		if (adev->gfx.rlc.clear_state_obj == NULL) {
1273 			r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1274 					     AMDGPU_GEM_DOMAIN_VRAM,
1275 					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1276 					     NULL, NULL,
1277 					     &adev->gfx.rlc.clear_state_obj);
1278 			if (r) {
1279 				dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1280 				gfx_v8_0_rlc_fini(adev);
1281 				return r;
1282 			}
1283 		}
1284 		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1285 		if (unlikely(r != 0)) {
1286 			gfx_v8_0_rlc_fini(adev);
1287 			return r;
1288 		}
1289 		r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1290 				  &adev->gfx.rlc.clear_state_gpu_addr);
1291 		if (r) {
1292 			amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1293 			dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
1294 			gfx_v8_0_rlc_fini(adev);
1295 			return r;
1296 		}
1297 
1298 		r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1299 		if (r) {
1300 			dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
1301 			gfx_v8_0_rlc_fini(adev);
1302 			return r;
1303 		}
1304 		/* set up the cs buffer */
1305 		dst_ptr = adev->gfx.rlc.cs_ptr;
1306 		gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1307 		amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1308 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1309 	}
1310 
1311 	if ((adev->asic_type == CHIP_CARRIZO) ||
1312 	    (adev->asic_type == CHIP_STONEY)) {
1313 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1314 		if (adev->gfx.rlc.cp_table_obj == NULL) {
1315 			r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1316 					     AMDGPU_GEM_DOMAIN_VRAM,
1317 					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1318 					     NULL, NULL,
1319 					     &adev->gfx.rlc.cp_table_obj);
1320 			if (r) {
1321 				dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1322 				return r;
1323 			}
1324 		}
1325 
1326 		r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1327 		if (unlikely(r != 0)) {
1328 			dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1329 			return r;
1330 		}
1331 		r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1332 				  &adev->gfx.rlc.cp_table_gpu_addr);
1333 		if (r) {
1334 			amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1335 			dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
1336 			return r;
1337 		}
1338 		r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1339 		if (r) {
1340 			dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1341 			return r;
1342 		}
1343 
1344 		cz_init_cp_jump_table(adev);
1345 
1346 		amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1347 		amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1348 	}
1349 
1350 	return 0;
1351 }
1352 
1353 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1354 {
1355 	int r;
1356 
1357 	if (adev->gfx.mec.hpd_eop_obj) {
1358 		r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1359 		if (unlikely(r != 0))
1360 			dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1361 		amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1362 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1363 		amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1364 		adev->gfx.mec.hpd_eop_obj = NULL;
1365 	}
1366 }
1367 
1368 #define MEC_HPD_SIZE 2048
1369 
1370 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1371 {
1372 	int r;
1373 	u32 *hpd;
1374 
1375 	/*
1376 	 * we assign only 1 pipe because all other pipes will
1377 	 * be handled by KFD
1378 	 */
1379 	adev->gfx.mec.num_mec = 1;
1380 	adev->gfx.mec.num_pipe = 1;
1381 	adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1382 
1383 	if (adev->gfx.mec.hpd_eop_obj == NULL) {
1384 		r = amdgpu_bo_create(adev,
1385 				     adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
1386 				     PAGE_SIZE, true,
1387 				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1388 				     &adev->gfx.mec.hpd_eop_obj);
1389 		if (r) {
1390 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1391 			return r;
1392 		}
1393 	}
1394 
1395 	r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1396 	if (unlikely(r != 0)) {
1397 		gfx_v8_0_mec_fini(adev);
1398 		return r;
1399 	}
1400 	r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1401 			  &adev->gfx.mec.hpd_eop_gpu_addr);
1402 	if (r) {
1403 		dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1404 		gfx_v8_0_mec_fini(adev);
1405 		return r;
1406 	}
1407 	r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1408 	if (r) {
1409 		dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1410 		gfx_v8_0_mec_fini(adev);
1411 		return r;
1412 	}
1413 
1414 	memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
1415 
1416 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1417 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1418 
1419 	return 0;
1420 }
1421 
1422 static const u32 vgpr_init_compute_shader[] =
1423 {
1424 	0x7e000209, 0x7e020208,
1425 	0x7e040207, 0x7e060206,
1426 	0x7e080205, 0x7e0a0204,
1427 	0x7e0c0203, 0x7e0e0202,
1428 	0x7e100201, 0x7e120200,
1429 	0x7e140209, 0x7e160208,
1430 	0x7e180207, 0x7e1a0206,
1431 	0x7e1c0205, 0x7e1e0204,
1432 	0x7e200203, 0x7e220202,
1433 	0x7e240201, 0x7e260200,
1434 	0x7e280209, 0x7e2a0208,
1435 	0x7e2c0207, 0x7e2e0206,
1436 	0x7e300205, 0x7e320204,
1437 	0x7e340203, 0x7e360202,
1438 	0x7e380201, 0x7e3a0200,
1439 	0x7e3c0209, 0x7e3e0208,
1440 	0x7e400207, 0x7e420206,
1441 	0x7e440205, 0x7e460204,
1442 	0x7e480203, 0x7e4a0202,
1443 	0x7e4c0201, 0x7e4e0200,
1444 	0x7e500209, 0x7e520208,
1445 	0x7e540207, 0x7e560206,
1446 	0x7e580205, 0x7e5a0204,
1447 	0x7e5c0203, 0x7e5e0202,
1448 	0x7e600201, 0x7e620200,
1449 	0x7e640209, 0x7e660208,
1450 	0x7e680207, 0x7e6a0206,
1451 	0x7e6c0205, 0x7e6e0204,
1452 	0x7e700203, 0x7e720202,
1453 	0x7e740201, 0x7e760200,
1454 	0x7e780209, 0x7e7a0208,
1455 	0x7e7c0207, 0x7e7e0206,
1456 	0xbf8a0000, 0xbf810000,
1457 };
1458 
1459 static const u32 sgpr_init_compute_shader[] =
1460 {
1461 	0xbe8a0100, 0xbe8c0102,
1462 	0xbe8e0104, 0xbe900106,
1463 	0xbe920108, 0xbe940100,
1464 	0xbe960102, 0xbe980104,
1465 	0xbe9a0106, 0xbe9c0108,
1466 	0xbe9e0100, 0xbea00102,
1467 	0xbea20104, 0xbea40106,
1468 	0xbea60108, 0xbea80100,
1469 	0xbeaa0102, 0xbeac0104,
1470 	0xbeae0106, 0xbeb00108,
1471 	0xbeb20100, 0xbeb40102,
1472 	0xbeb60104, 0xbeb80106,
1473 	0xbeba0108, 0xbebc0100,
1474 	0xbebe0102, 0xbec00104,
1475 	0xbec20106, 0xbec40108,
1476 	0xbec60100, 0xbec80102,
1477 	0xbee60004, 0xbee70005,
1478 	0xbeea0006, 0xbeeb0007,
1479 	0xbee80008, 0xbee90009,
1480 	0xbefc0000, 0xbf8a0000,
1481 	0xbf810000, 0x00000000,
1482 };
1483 
1484 static const u32 vgpr_init_regs[] =
1485 {
1486 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1487 	mmCOMPUTE_RESOURCE_LIMITS, 0,
1488 	mmCOMPUTE_NUM_THREAD_X, 256*4,
1489 	mmCOMPUTE_NUM_THREAD_Y, 1,
1490 	mmCOMPUTE_NUM_THREAD_Z, 1,
1491 	mmCOMPUTE_PGM_RSRC2, 20,
1492 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1493 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1494 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1495 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1496 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1497 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1498 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1499 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1500 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1501 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1502 };
1503 
1504 static const u32 sgpr1_init_regs[] =
1505 {
1506 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1507 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1508 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1509 	mmCOMPUTE_NUM_THREAD_Y, 1,
1510 	mmCOMPUTE_NUM_THREAD_Z, 1,
1511 	mmCOMPUTE_PGM_RSRC2, 20,
1512 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1513 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1514 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1515 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1516 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1517 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1518 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1519 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1520 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1521 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1522 };
1523 
1524 static const u32 sgpr2_init_regs[] =
1525 {
1526 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1527 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1528 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1529 	mmCOMPUTE_NUM_THREAD_Y, 1,
1530 	mmCOMPUTE_NUM_THREAD_Z, 1,
1531 	mmCOMPUTE_PGM_RSRC2, 20,
1532 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1533 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1534 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1535 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1536 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1537 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1538 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1539 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1540 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1541 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1542 };
1543 
1544 static const u32 sec_ded_counter_registers[] =
1545 {
1546 	mmCPC_EDC_ATC_CNT,
1547 	mmCPC_EDC_SCRATCH_CNT,
1548 	mmCPC_EDC_UCODE_CNT,
1549 	mmCPF_EDC_ATC_CNT,
1550 	mmCPF_EDC_ROQ_CNT,
1551 	mmCPF_EDC_TAG_CNT,
1552 	mmCPG_EDC_ATC_CNT,
1553 	mmCPG_EDC_DMA_CNT,
1554 	mmCPG_EDC_TAG_CNT,
1555 	mmDC_EDC_CSINVOC_CNT,
1556 	mmDC_EDC_RESTORE_CNT,
1557 	mmDC_EDC_STATE_CNT,
1558 	mmGDS_EDC_CNT,
1559 	mmGDS_EDC_GRBM_CNT,
1560 	mmGDS_EDC_OA_DED,
1561 	mmSPI_EDC_CNT,
1562 	mmSQC_ATC_EDC_GATCL1_CNT,
1563 	mmSQC_EDC_CNT,
1564 	mmSQ_EDC_DED_CNT,
1565 	mmSQ_EDC_INFO,
1566 	mmSQ_EDC_SEC_CNT,
1567 	mmTCC_EDC_CNT,
1568 	mmTCP_ATC_EDC_GATCL1_CNT,
1569 	mmTCP_EDC_CNT,
1570 	mmTD_EDC_CNT
1571 };
1572 
1573 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1574 {
1575 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1576 	struct amdgpu_ib ib;
1577 	struct fence *f = NULL;
1578 	int r, i;
1579 	u32 tmp;
1580 	unsigned total_size, vgpr_offset, sgpr_offset;
1581 	u64 gpu_addr;
1582 
1583 	/* only supported on CZ */
1584 	if (adev->asic_type != CHIP_CARRIZO)
1585 		return 0;
1586 
1587 	/* bail if the compute ring is not ready */
1588 	if (!ring->ready)
1589 		return 0;
1590 
1591 	tmp = RREG32(mmGB_EDC_MODE);
1592 	WREG32(mmGB_EDC_MODE, 0);
1593 
1594 	total_size =
1595 		(((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1596 	total_size +=
1597 		(((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1598 	total_size +=
1599 		(((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1600 	total_size = ALIGN(total_size, 256);
1601 	vgpr_offset = total_size;
1602 	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1603 	sgpr_offset = total_size;
1604 	total_size += sizeof(sgpr_init_compute_shader);
1605 
1606 	/* allocate an indirect buffer to put the commands in */
1607 	memset(&ib, 0, sizeof(ib));
1608 	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1609 	if (r) {
1610 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1611 		return r;
1612 	}
1613 
1614 	/* load the compute shaders */
1615 	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1616 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1617 
1618 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1619 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1620 
1621 	/* init the ib length to 0 */
1622 	ib.length_dw = 0;
1623 
1624 	/* VGPR */
1625 	/* write the register state for the compute dispatch */
1626 	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1627 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1628 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1629 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1630 	}
1631 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1632 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1633 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1634 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1635 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1636 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1637 
1638 	/* write dispatch packet */
1639 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1640 	ib.ptr[ib.length_dw++] = 8; /* x */
1641 	ib.ptr[ib.length_dw++] = 1; /* y */
1642 	ib.ptr[ib.length_dw++] = 1; /* z */
1643 	ib.ptr[ib.length_dw++] =
1644 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1645 
1646 	/* write CS partial flush packet */
1647 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1648 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1649 
1650 	/* SGPR1 */
1651 	/* write the register state for the compute dispatch */
1652 	for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1653 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1654 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1655 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1656 	}
1657 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1658 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1659 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1660 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1661 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1662 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1663 
1664 	/* write dispatch packet */
1665 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1666 	ib.ptr[ib.length_dw++] = 8; /* x */
1667 	ib.ptr[ib.length_dw++] = 1; /* y */
1668 	ib.ptr[ib.length_dw++] = 1; /* z */
1669 	ib.ptr[ib.length_dw++] =
1670 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1671 
1672 	/* write CS partial flush packet */
1673 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1674 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1675 
1676 	/* SGPR2 */
1677 	/* write the register state for the compute dispatch */
1678 	for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1679 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1680 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1681 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1682 	}
1683 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1684 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1685 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1686 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1687 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1688 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1689 
1690 	/* write dispatch packet */
1691 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1692 	ib.ptr[ib.length_dw++] = 8; /* x */
1693 	ib.ptr[ib.length_dw++] = 1; /* y */
1694 	ib.ptr[ib.length_dw++] = 1; /* z */
1695 	ib.ptr[ib.length_dw++] =
1696 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1697 
1698 	/* write CS partial flush packet */
1699 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1700 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1701 
1702 	/* shedule the ib on the ring */
1703 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
1704 	if (r) {
1705 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1706 		goto fail;
1707 	}
1708 
1709 	/* wait for the GPU to finish processing the IB */
1710 	r = fence_wait(f, false);
1711 	if (r) {
1712 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1713 		goto fail;
1714 	}
1715 
1716 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1717 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1718 	WREG32(mmGB_EDC_MODE, tmp);
1719 
1720 	tmp = RREG32(mmCC_GC_EDC_CONFIG);
1721 	tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1722 	WREG32(mmCC_GC_EDC_CONFIG, tmp);
1723 
1724 
1725 	/* read back registers to clear the counters */
1726 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1727 		RREG32(sec_ded_counter_registers[i]);
1728 
1729 fail:
1730 	amdgpu_ib_free(adev, &ib, NULL);
1731 	fence_put(f);
1732 
1733 	return r;
1734 }
1735 
1736 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1737 {
1738 	u32 gb_addr_config;
1739 	u32 mc_shared_chmap, mc_arb_ramcfg;
1740 	u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1741 	u32 tmp;
1742 	int ret;
1743 
1744 	switch (adev->asic_type) {
1745 	case CHIP_TOPAZ:
1746 		adev->gfx.config.max_shader_engines = 1;
1747 		adev->gfx.config.max_tile_pipes = 2;
1748 		adev->gfx.config.max_cu_per_sh = 6;
1749 		adev->gfx.config.max_sh_per_se = 1;
1750 		adev->gfx.config.max_backends_per_se = 2;
1751 		adev->gfx.config.max_texture_channel_caches = 2;
1752 		adev->gfx.config.max_gprs = 256;
1753 		adev->gfx.config.max_gs_threads = 32;
1754 		adev->gfx.config.max_hw_contexts = 8;
1755 
1756 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1757 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1758 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1759 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1760 		gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1761 		break;
1762 	case CHIP_FIJI:
1763 		adev->gfx.config.max_shader_engines = 4;
1764 		adev->gfx.config.max_tile_pipes = 16;
1765 		adev->gfx.config.max_cu_per_sh = 16;
1766 		adev->gfx.config.max_sh_per_se = 1;
1767 		adev->gfx.config.max_backends_per_se = 4;
1768 		adev->gfx.config.max_texture_channel_caches = 16;
1769 		adev->gfx.config.max_gprs = 256;
1770 		adev->gfx.config.max_gs_threads = 32;
1771 		adev->gfx.config.max_hw_contexts = 8;
1772 
1773 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1774 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1775 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1776 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1777 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1778 		break;
1779 	case CHIP_POLARIS11:
1780 		ret = amdgpu_atombios_get_gfx_info(adev);
1781 		if (ret)
1782 			return ret;
1783 		adev->gfx.config.max_gprs = 256;
1784 		adev->gfx.config.max_gs_threads = 32;
1785 		adev->gfx.config.max_hw_contexts = 8;
1786 
1787 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1788 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1789 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1790 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1791 		gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1792 		break;
1793 	case CHIP_POLARIS10:
1794 		ret = amdgpu_atombios_get_gfx_info(adev);
1795 		if (ret)
1796 			return ret;
1797 		adev->gfx.config.max_gprs = 256;
1798 		adev->gfx.config.max_gs_threads = 32;
1799 		adev->gfx.config.max_hw_contexts = 8;
1800 
1801 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1802 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1803 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1804 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1805 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1806 		break;
1807 	case CHIP_TONGA:
1808 		adev->gfx.config.max_shader_engines = 4;
1809 		adev->gfx.config.max_tile_pipes = 8;
1810 		adev->gfx.config.max_cu_per_sh = 8;
1811 		adev->gfx.config.max_sh_per_se = 1;
1812 		adev->gfx.config.max_backends_per_se = 2;
1813 		adev->gfx.config.max_texture_channel_caches = 8;
1814 		adev->gfx.config.max_gprs = 256;
1815 		adev->gfx.config.max_gs_threads = 32;
1816 		adev->gfx.config.max_hw_contexts = 8;
1817 
1818 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1819 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1820 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1821 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1822 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1823 		break;
1824 	case CHIP_CARRIZO:
1825 		adev->gfx.config.max_shader_engines = 1;
1826 		adev->gfx.config.max_tile_pipes = 2;
1827 		adev->gfx.config.max_sh_per_se = 1;
1828 		adev->gfx.config.max_backends_per_se = 2;
1829 
1830 		switch (adev->pdev->revision) {
1831 		case 0xc4:
1832 		case 0x84:
1833 		case 0xc8:
1834 		case 0xcc:
1835 		case 0xe1:
1836 		case 0xe3:
1837 			/* B10 */
1838 			adev->gfx.config.max_cu_per_sh = 8;
1839 			break;
1840 		case 0xc5:
1841 		case 0x81:
1842 		case 0x85:
1843 		case 0xc9:
1844 		case 0xcd:
1845 		case 0xe2:
1846 		case 0xe4:
1847 			/* B8 */
1848 			adev->gfx.config.max_cu_per_sh = 6;
1849 			break;
1850 		case 0xc6:
1851 		case 0xca:
1852 		case 0xce:
1853 		case 0x88:
1854 			/* B6 */
1855 			adev->gfx.config.max_cu_per_sh = 6;
1856 			break;
1857 		case 0xc7:
1858 		case 0x87:
1859 		case 0xcb:
1860 		case 0xe5:
1861 		case 0x89:
1862 		default:
1863 			/* B4 */
1864 			adev->gfx.config.max_cu_per_sh = 4;
1865 			break;
1866 		}
1867 
1868 		adev->gfx.config.max_texture_channel_caches = 2;
1869 		adev->gfx.config.max_gprs = 256;
1870 		adev->gfx.config.max_gs_threads = 32;
1871 		adev->gfx.config.max_hw_contexts = 8;
1872 
1873 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1874 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1875 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1876 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1877 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1878 		break;
1879 	case CHIP_STONEY:
1880 		adev->gfx.config.max_shader_engines = 1;
1881 		adev->gfx.config.max_tile_pipes = 2;
1882 		adev->gfx.config.max_sh_per_se = 1;
1883 		adev->gfx.config.max_backends_per_se = 1;
1884 
1885 		switch (adev->pdev->revision) {
1886 		case 0xc0:
1887 		case 0xc1:
1888 		case 0xc2:
1889 		case 0xc4:
1890 		case 0xc8:
1891 		case 0xc9:
1892 			adev->gfx.config.max_cu_per_sh = 3;
1893 			break;
1894 		case 0xd0:
1895 		case 0xd1:
1896 		case 0xd2:
1897 		default:
1898 			adev->gfx.config.max_cu_per_sh = 2;
1899 			break;
1900 		}
1901 
1902 		adev->gfx.config.max_texture_channel_caches = 2;
1903 		adev->gfx.config.max_gprs = 256;
1904 		adev->gfx.config.max_gs_threads = 16;
1905 		adev->gfx.config.max_hw_contexts = 8;
1906 
1907 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1908 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1909 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1910 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1911 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1912 		break;
1913 	default:
1914 		adev->gfx.config.max_shader_engines = 2;
1915 		adev->gfx.config.max_tile_pipes = 4;
1916 		adev->gfx.config.max_cu_per_sh = 2;
1917 		adev->gfx.config.max_sh_per_se = 1;
1918 		adev->gfx.config.max_backends_per_se = 2;
1919 		adev->gfx.config.max_texture_channel_caches = 4;
1920 		adev->gfx.config.max_gprs = 256;
1921 		adev->gfx.config.max_gs_threads = 32;
1922 		adev->gfx.config.max_hw_contexts = 8;
1923 
1924 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1925 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1926 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1927 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1928 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1929 		break;
1930 	}
1931 
1932 	mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1933 	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1934 	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1935 
1936 	adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1937 	adev->gfx.config.mem_max_burst_length_bytes = 256;
1938 	if (adev->flags & AMD_IS_APU) {
1939 		/* Get memory bank mapping mode. */
1940 		tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1941 		dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1942 		dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1943 
1944 		tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1945 		dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1946 		dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1947 
1948 		/* Validate settings in case only one DIMM installed. */
1949 		if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1950 			dimm00_addr_map = 0;
1951 		if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1952 			dimm01_addr_map = 0;
1953 		if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1954 			dimm10_addr_map = 0;
1955 		if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1956 			dimm11_addr_map = 0;
1957 
1958 		/* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1959 		/* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1960 		if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1961 			adev->gfx.config.mem_row_size_in_kb = 2;
1962 		else
1963 			adev->gfx.config.mem_row_size_in_kb = 1;
1964 	} else {
1965 		tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1966 		adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1967 		if (adev->gfx.config.mem_row_size_in_kb > 4)
1968 			adev->gfx.config.mem_row_size_in_kb = 4;
1969 	}
1970 
1971 	adev->gfx.config.shader_engine_tile_size = 32;
1972 	adev->gfx.config.num_gpus = 1;
1973 	adev->gfx.config.multi_gpu_tile_size = 64;
1974 
1975 	/* fix up row size */
1976 	switch (adev->gfx.config.mem_row_size_in_kb) {
1977 	case 1:
1978 	default:
1979 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1980 		break;
1981 	case 2:
1982 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1983 		break;
1984 	case 4:
1985 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1986 		break;
1987 	}
1988 	adev->gfx.config.gb_addr_config = gb_addr_config;
1989 
1990 	return 0;
1991 }
1992 
1993 static int gfx_v8_0_sw_init(void *handle)
1994 {
1995 	int i, r;
1996 	struct amdgpu_ring *ring;
1997 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1998 
1999 	/* EOP Event */
2000 	r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
2001 	if (r)
2002 		return r;
2003 
2004 	/* Privileged reg */
2005 	r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
2006 	if (r)
2007 		return r;
2008 
2009 	/* Privileged inst */
2010 	r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
2011 	if (r)
2012 		return r;
2013 
2014 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2015 
2016 	gfx_v8_0_scratch_init(adev);
2017 
2018 	r = gfx_v8_0_init_microcode(adev);
2019 	if (r) {
2020 		DRM_ERROR("Failed to load gfx firmware!\n");
2021 		return r;
2022 	}
2023 
2024 	r = gfx_v8_0_rlc_init(adev);
2025 	if (r) {
2026 		DRM_ERROR("Failed to init rlc BOs!\n");
2027 		return r;
2028 	}
2029 
2030 	r = gfx_v8_0_mec_init(adev);
2031 	if (r) {
2032 		DRM_ERROR("Failed to init MEC BOs!\n");
2033 		return r;
2034 	}
2035 
2036 	/* set up the gfx ring */
2037 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2038 		ring = &adev->gfx.gfx_ring[i];
2039 		ring->ring_obj = NULL;
2040 		sprintf(ring->name, "gfx");
2041 		/* no gfx doorbells on iceland */
2042 		if (adev->asic_type != CHIP_TOPAZ) {
2043 			ring->use_doorbell = true;
2044 			ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2045 		}
2046 
2047 		r = amdgpu_ring_init(adev, ring, 1024,
2048 				     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
2049 				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
2050 				     AMDGPU_RING_TYPE_GFX);
2051 		if (r)
2052 			return r;
2053 	}
2054 
2055 	/* set up the compute queues */
2056 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2057 		unsigned irq_type;
2058 
2059 		/* max 32 queues per MEC */
2060 		if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2061 			DRM_ERROR("Too many (%d) compute rings!\n", i);
2062 			break;
2063 		}
2064 		ring = &adev->gfx.compute_ring[i];
2065 		ring->ring_obj = NULL;
2066 		ring->use_doorbell = true;
2067 		ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2068 		ring->me = 1; /* first MEC */
2069 		ring->pipe = i / 8;
2070 		ring->queue = i % 8;
2071 		sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2072 		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2073 		/* type-2 packets are deprecated on MEC, use type-3 instead */
2074 		r = amdgpu_ring_init(adev, ring, 1024,
2075 				     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
2076 				     &adev->gfx.eop_irq, irq_type,
2077 				     AMDGPU_RING_TYPE_COMPUTE);
2078 		if (r)
2079 			return r;
2080 	}
2081 
2082 	/* reserve GDS, GWS and OA resource for gfx */
2083 	r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2084 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2085 				    &adev->gds.gds_gfx_bo, NULL, NULL);
2086 	if (r)
2087 		return r;
2088 
2089 	r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2090 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2091 				    &adev->gds.gws_gfx_bo, NULL, NULL);
2092 	if (r)
2093 		return r;
2094 
2095 	r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2096 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2097 				    &adev->gds.oa_gfx_bo, NULL, NULL);
2098 	if (r)
2099 		return r;
2100 
2101 	adev->gfx.ce_ram_size = 0x8000;
2102 
2103 	r = gfx_v8_0_gpu_early_init(adev);
2104 	if (r)
2105 		return r;
2106 
2107 	return 0;
2108 }
2109 
2110 static int gfx_v8_0_sw_fini(void *handle)
2111 {
2112 	int i;
2113 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2114 
2115 	amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2116 	amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2117 	amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2118 
2119 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2120 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2121 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2122 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2123 
2124 	gfx_v8_0_mec_fini(adev);
2125 	gfx_v8_0_rlc_fini(adev);
2126 	gfx_v8_0_free_microcode(adev);
2127 
2128 	return 0;
2129 }
2130 
2131 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2132 {
2133 	uint32_t *modearray, *mod2array;
2134 	const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2135 	const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2136 	u32 reg_offset;
2137 
2138 	modearray = adev->gfx.config.tile_mode_array;
2139 	mod2array = adev->gfx.config.macrotile_mode_array;
2140 
2141 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2142 		modearray[reg_offset] = 0;
2143 
2144 	for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2145 		mod2array[reg_offset] = 0;
2146 
2147 	switch (adev->asic_type) {
2148 	case CHIP_TOPAZ:
2149 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2150 				PIPE_CONFIG(ADDR_SURF_P2) |
2151 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2152 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2153 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2154 				PIPE_CONFIG(ADDR_SURF_P2) |
2155 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2156 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2157 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2158 				PIPE_CONFIG(ADDR_SURF_P2) |
2159 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2160 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2161 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2162 				PIPE_CONFIG(ADDR_SURF_P2) |
2163 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2164 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2165 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2166 				PIPE_CONFIG(ADDR_SURF_P2) |
2167 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2168 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2169 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2170 				PIPE_CONFIG(ADDR_SURF_P2) |
2171 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2172 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2173 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2174 				PIPE_CONFIG(ADDR_SURF_P2) |
2175 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2176 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2177 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2178 				PIPE_CONFIG(ADDR_SURF_P2));
2179 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2180 				PIPE_CONFIG(ADDR_SURF_P2) |
2181 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2182 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2183 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2184 				 PIPE_CONFIG(ADDR_SURF_P2) |
2185 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2186 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2187 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2188 				 PIPE_CONFIG(ADDR_SURF_P2) |
2189 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2190 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2191 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2192 				 PIPE_CONFIG(ADDR_SURF_P2) |
2193 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2194 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2195 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2196 				 PIPE_CONFIG(ADDR_SURF_P2) |
2197 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2198 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2199 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2200 				 PIPE_CONFIG(ADDR_SURF_P2) |
2201 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2202 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2203 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2204 				 PIPE_CONFIG(ADDR_SURF_P2) |
2205 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2206 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2207 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2208 				 PIPE_CONFIG(ADDR_SURF_P2) |
2209 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2210 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2211 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2212 				 PIPE_CONFIG(ADDR_SURF_P2) |
2213 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2214 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2215 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2216 				 PIPE_CONFIG(ADDR_SURF_P2) |
2217 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2218 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2219 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2220 				 PIPE_CONFIG(ADDR_SURF_P2) |
2221 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2222 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2223 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2224 				 PIPE_CONFIG(ADDR_SURF_P2) |
2225 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2226 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2227 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2228 				 PIPE_CONFIG(ADDR_SURF_P2) |
2229 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2230 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2231 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2232 				 PIPE_CONFIG(ADDR_SURF_P2) |
2233 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2234 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2235 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2236 				 PIPE_CONFIG(ADDR_SURF_P2) |
2237 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2238 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2239 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2240 				 PIPE_CONFIG(ADDR_SURF_P2) |
2241 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2242 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2243 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2244 				 PIPE_CONFIG(ADDR_SURF_P2) |
2245 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2246 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2247 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2248 				 PIPE_CONFIG(ADDR_SURF_P2) |
2249 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2250 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2251 
2252 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2253 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2254 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2255 				NUM_BANKS(ADDR_SURF_8_BANK));
2256 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2257 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2258 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2259 				NUM_BANKS(ADDR_SURF_8_BANK));
2260 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2261 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2262 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2263 				NUM_BANKS(ADDR_SURF_8_BANK));
2264 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2265 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2266 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2267 				NUM_BANKS(ADDR_SURF_8_BANK));
2268 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2269 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2270 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2271 				NUM_BANKS(ADDR_SURF_8_BANK));
2272 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2273 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2274 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2275 				NUM_BANKS(ADDR_SURF_8_BANK));
2276 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2277 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2278 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2279 				NUM_BANKS(ADDR_SURF_8_BANK));
2280 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2281 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2282 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2283 				NUM_BANKS(ADDR_SURF_16_BANK));
2284 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2285 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2286 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2287 				NUM_BANKS(ADDR_SURF_16_BANK));
2288 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2289 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2290 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2291 				 NUM_BANKS(ADDR_SURF_16_BANK));
2292 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2293 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2294 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2295 				 NUM_BANKS(ADDR_SURF_16_BANK));
2296 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2297 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2298 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2299 				 NUM_BANKS(ADDR_SURF_16_BANK));
2300 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2301 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2302 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2303 				 NUM_BANKS(ADDR_SURF_16_BANK));
2304 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2305 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2306 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2307 				 NUM_BANKS(ADDR_SURF_8_BANK));
2308 
2309 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2310 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2311 			    reg_offset != 23)
2312 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2313 
2314 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2315 			if (reg_offset != 7)
2316 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2317 
2318 		break;
2319 	case CHIP_FIJI:
2320 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2321 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2322 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2323 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2324 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2325 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2326 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2327 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2328 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2329 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2330 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2331 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2332 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2333 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2334 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2335 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2336 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2337 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2338 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2339 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2340 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2341 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2342 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2343 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2344 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2345 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2346 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2347 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2348 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2349 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2350 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2351 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2352 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2353 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2354 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2355 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2356 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2357 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2358 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2359 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2360 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2361 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2362 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2363 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2364 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2365 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2366 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2367 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2368 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2369 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2370 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2371 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2372 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2373 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2374 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2375 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2376 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2377 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2378 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2379 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2380 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2381 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2382 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2383 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2384 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2385 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2386 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2387 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2388 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2389 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2390 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2391 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2393 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2394 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2395 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2397 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2398 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2399 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2400 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2401 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2402 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2403 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2404 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2405 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2406 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2407 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2408 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2409 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2410 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2411 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2412 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2413 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2414 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2415 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2416 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2417 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2418 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2419 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2421 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2422 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2423 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2424 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2425 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2426 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2427 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2428 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2429 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2430 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2431 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2432 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2433 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2434 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2435 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2436 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2437 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2438 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2439 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2440 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2441 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2442 
2443 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2444 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2445 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2446 				NUM_BANKS(ADDR_SURF_8_BANK));
2447 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2448 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2449 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2450 				NUM_BANKS(ADDR_SURF_8_BANK));
2451 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2452 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2453 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2454 				NUM_BANKS(ADDR_SURF_8_BANK));
2455 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2456 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2457 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2458 				NUM_BANKS(ADDR_SURF_8_BANK));
2459 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2460 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2461 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2462 				NUM_BANKS(ADDR_SURF_8_BANK));
2463 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2464 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2465 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2466 				NUM_BANKS(ADDR_SURF_8_BANK));
2467 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2468 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2469 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2470 				NUM_BANKS(ADDR_SURF_8_BANK));
2471 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2472 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2473 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2474 				NUM_BANKS(ADDR_SURF_8_BANK));
2475 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2476 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2477 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2478 				NUM_BANKS(ADDR_SURF_8_BANK));
2479 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2480 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2481 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2482 				 NUM_BANKS(ADDR_SURF_8_BANK));
2483 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2484 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2485 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2486 				 NUM_BANKS(ADDR_SURF_8_BANK));
2487 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2488 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2489 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2490 				 NUM_BANKS(ADDR_SURF_8_BANK));
2491 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2492 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2493 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2494 				 NUM_BANKS(ADDR_SURF_8_BANK));
2495 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2496 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2497 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2498 				 NUM_BANKS(ADDR_SURF_4_BANK));
2499 
2500 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2501 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2502 
2503 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2504 			if (reg_offset != 7)
2505 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2506 
2507 		break;
2508 	case CHIP_TONGA:
2509 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2510 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2511 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2512 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2513 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2514 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2515 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2516 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2517 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2518 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2519 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2520 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2521 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2522 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2523 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2524 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2525 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2526 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2527 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2528 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2529 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2530 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2531 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2532 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2533 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2534 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2536 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2537 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2538 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2539 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2540 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2541 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2542 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2543 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2544 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2545 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2546 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2547 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2548 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2549 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2550 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2551 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2552 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2553 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2554 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2555 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2556 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2557 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2558 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2559 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2560 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2561 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2562 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2563 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2564 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2565 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2566 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2567 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2568 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2569 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2570 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2571 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2572 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2573 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2574 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2575 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2576 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2577 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2578 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2579 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2580 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2581 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2582 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2583 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2584 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2585 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2586 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2587 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2588 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2589 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2590 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2591 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2592 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2593 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2594 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2595 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2596 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2597 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2598 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2599 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2600 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2601 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2602 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2603 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2604 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2605 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2606 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2607 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2608 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2609 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2610 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2611 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2612 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2613 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2614 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2615 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2616 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2617 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2618 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2619 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2620 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2621 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2622 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2623 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2624 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2625 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2626 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2627 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2628 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2629 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2630 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2631 
2632 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2633 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2634 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2635 				NUM_BANKS(ADDR_SURF_16_BANK));
2636 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2637 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2638 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2639 				NUM_BANKS(ADDR_SURF_16_BANK));
2640 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2641 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2642 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2643 				NUM_BANKS(ADDR_SURF_16_BANK));
2644 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2645 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2646 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2647 				NUM_BANKS(ADDR_SURF_16_BANK));
2648 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2649 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2650 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2651 				NUM_BANKS(ADDR_SURF_16_BANK));
2652 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2653 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2654 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2655 				NUM_BANKS(ADDR_SURF_16_BANK));
2656 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2657 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2658 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2659 				NUM_BANKS(ADDR_SURF_16_BANK));
2660 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2661 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2662 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2663 				NUM_BANKS(ADDR_SURF_16_BANK));
2664 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2665 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2666 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2667 				NUM_BANKS(ADDR_SURF_16_BANK));
2668 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2669 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2670 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2671 				 NUM_BANKS(ADDR_SURF_16_BANK));
2672 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2673 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2674 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2675 				 NUM_BANKS(ADDR_SURF_16_BANK));
2676 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2677 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2678 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2679 				 NUM_BANKS(ADDR_SURF_8_BANK));
2680 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2681 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2682 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2683 				 NUM_BANKS(ADDR_SURF_4_BANK));
2684 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2685 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2686 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2687 				 NUM_BANKS(ADDR_SURF_4_BANK));
2688 
2689 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2690 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2691 
2692 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2693 			if (reg_offset != 7)
2694 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2695 
2696 		break;
2697 	case CHIP_POLARIS11:
2698 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2699 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2700 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2701 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2702 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2703 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2704 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2705 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2706 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2707 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2708 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2709 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2710 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2711 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2712 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2713 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2714 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2715 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2716 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2717 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2718 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2719 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2720 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2721 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2722 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2723 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2724 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2725 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2726 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2727 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2728 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2729 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2730 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2731 				PIPE_CONFIG(ADDR_SURF_P4_16x16));
2732 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2733 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2734 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2735 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2736 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2737 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2738 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2739 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2740 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2741 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2742 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2743 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2744 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2745 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2746 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2747 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2748 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2749 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2750 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2751 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2752 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2753 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2754 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2755 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2756 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2757 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2758 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2759 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2760 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2761 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2762 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2763 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2764 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2765 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2766 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2767 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2768 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2769 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2770 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2771 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2772 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2773 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2774 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2775 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2776 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2777 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2778 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2779 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2780 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2781 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2782 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2783 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2784 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2785 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2786 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2787 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2788 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2789 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2790 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2791 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2792 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2793 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2794 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2795 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2796 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2797 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2798 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2799 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2800 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2801 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2802 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2803 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2804 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2805 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2806 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2807 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2808 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2809 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2810 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2811 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2812 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2813 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2814 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2815 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2816 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2817 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2818 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2819 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2820 
2821 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2822 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2823 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2824 				NUM_BANKS(ADDR_SURF_16_BANK));
2825 
2826 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2827 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2828 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2829 				NUM_BANKS(ADDR_SURF_16_BANK));
2830 
2831 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2832 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2833 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2834 				NUM_BANKS(ADDR_SURF_16_BANK));
2835 
2836 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2837 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2838 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2839 				NUM_BANKS(ADDR_SURF_16_BANK));
2840 
2841 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2842 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2843 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2844 				NUM_BANKS(ADDR_SURF_16_BANK));
2845 
2846 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2847 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2848 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2849 				NUM_BANKS(ADDR_SURF_16_BANK));
2850 
2851 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2852 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2853 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2854 				NUM_BANKS(ADDR_SURF_16_BANK));
2855 
2856 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2857 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2858 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2859 				NUM_BANKS(ADDR_SURF_16_BANK));
2860 
2861 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2862 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2863 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2864 				NUM_BANKS(ADDR_SURF_16_BANK));
2865 
2866 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2867 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2868 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2869 				NUM_BANKS(ADDR_SURF_16_BANK));
2870 
2871 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2872 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2873 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2874 				NUM_BANKS(ADDR_SURF_16_BANK));
2875 
2876 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2877 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2878 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2879 				NUM_BANKS(ADDR_SURF_16_BANK));
2880 
2881 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2882 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2883 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2884 				NUM_BANKS(ADDR_SURF_8_BANK));
2885 
2886 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2887 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2888 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2889 				NUM_BANKS(ADDR_SURF_4_BANK));
2890 
2891 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2892 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2893 
2894 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2895 			if (reg_offset != 7)
2896 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2897 
2898 		break;
2899 	case CHIP_POLARIS10:
2900 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2901 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2902 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2903 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2904 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2905 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2906 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2907 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2908 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2909 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2910 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2911 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2912 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2913 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2914 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2915 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2916 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2917 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2918 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2919 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2920 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2921 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2922 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2923 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2924 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2925 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2926 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2927 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2928 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2929 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2930 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2931 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2932 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2933 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2934 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2935 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2936 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2937 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2938 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2939 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2940 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2941 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2942 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2943 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2944 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2945 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2946 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2947 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2948 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2949 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2950 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2951 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2952 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2953 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2954 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2955 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2956 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2957 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2958 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2959 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2960 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2961 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2962 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2963 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2964 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2965 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2966 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2967 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2968 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2969 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2970 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2971 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2972 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2973 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2974 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2975 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2976 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2977 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2978 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2979 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2980 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2981 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2982 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2983 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2984 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2985 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2986 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2987 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2988 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2989 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2990 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2991 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2992 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2993 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2994 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2995 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2996 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2997 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2998 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2999 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3000 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3001 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3002 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3003 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3004 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3005 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3006 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3007 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3008 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3009 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3010 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3011 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3012 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3013 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3014 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3015 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3016 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3017 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3018 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3019 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3020 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3021 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3022 
3023 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3024 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3025 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3026 				NUM_BANKS(ADDR_SURF_16_BANK));
3027 
3028 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3029 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3030 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3031 				NUM_BANKS(ADDR_SURF_16_BANK));
3032 
3033 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3034 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3035 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3036 				NUM_BANKS(ADDR_SURF_16_BANK));
3037 
3038 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3039 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3040 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3041 				NUM_BANKS(ADDR_SURF_16_BANK));
3042 
3043 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3044 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3045 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3046 				NUM_BANKS(ADDR_SURF_16_BANK));
3047 
3048 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3049 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3050 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3051 				NUM_BANKS(ADDR_SURF_16_BANK));
3052 
3053 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3054 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3055 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3056 				NUM_BANKS(ADDR_SURF_16_BANK));
3057 
3058 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3059 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3060 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3061 				NUM_BANKS(ADDR_SURF_16_BANK));
3062 
3063 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3064 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3065 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3066 				NUM_BANKS(ADDR_SURF_16_BANK));
3067 
3068 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3069 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3070 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3071 				NUM_BANKS(ADDR_SURF_16_BANK));
3072 
3073 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3074 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3075 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3076 				NUM_BANKS(ADDR_SURF_16_BANK));
3077 
3078 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3079 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3080 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3081 				NUM_BANKS(ADDR_SURF_8_BANK));
3082 
3083 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3084 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3085 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3086 				NUM_BANKS(ADDR_SURF_4_BANK));
3087 
3088 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3089 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3090 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3091 				NUM_BANKS(ADDR_SURF_4_BANK));
3092 
3093 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3094 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3095 
3096 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3097 			if (reg_offset != 7)
3098 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3099 
3100 		break;
3101 	case CHIP_STONEY:
3102 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3103 				PIPE_CONFIG(ADDR_SURF_P2) |
3104 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3105 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3106 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3107 				PIPE_CONFIG(ADDR_SURF_P2) |
3108 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3109 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3110 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3111 				PIPE_CONFIG(ADDR_SURF_P2) |
3112 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3113 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3114 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3115 				PIPE_CONFIG(ADDR_SURF_P2) |
3116 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3117 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3118 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3119 				PIPE_CONFIG(ADDR_SURF_P2) |
3120 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3121 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3122 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3123 				PIPE_CONFIG(ADDR_SURF_P2) |
3124 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3125 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3126 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3127 				PIPE_CONFIG(ADDR_SURF_P2) |
3128 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3129 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3130 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3131 				PIPE_CONFIG(ADDR_SURF_P2));
3132 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3133 				PIPE_CONFIG(ADDR_SURF_P2) |
3134 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3135 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3136 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3137 				 PIPE_CONFIG(ADDR_SURF_P2) |
3138 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3139 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3140 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3141 				 PIPE_CONFIG(ADDR_SURF_P2) |
3142 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3143 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3144 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3145 				 PIPE_CONFIG(ADDR_SURF_P2) |
3146 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3147 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3148 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3149 				 PIPE_CONFIG(ADDR_SURF_P2) |
3150 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3151 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3152 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3153 				 PIPE_CONFIG(ADDR_SURF_P2) |
3154 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3155 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3156 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3157 				 PIPE_CONFIG(ADDR_SURF_P2) |
3158 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3159 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3160 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3161 				 PIPE_CONFIG(ADDR_SURF_P2) |
3162 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3163 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3164 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3165 				 PIPE_CONFIG(ADDR_SURF_P2) |
3166 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3167 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3168 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3169 				 PIPE_CONFIG(ADDR_SURF_P2) |
3170 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3171 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3172 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3173 				 PIPE_CONFIG(ADDR_SURF_P2) |
3174 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3175 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3176 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3177 				 PIPE_CONFIG(ADDR_SURF_P2) |
3178 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3179 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3180 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3181 				 PIPE_CONFIG(ADDR_SURF_P2) |
3182 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3183 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3184 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3185 				 PIPE_CONFIG(ADDR_SURF_P2) |
3186 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3187 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3188 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3189 				 PIPE_CONFIG(ADDR_SURF_P2) |
3190 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3191 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3192 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3193 				 PIPE_CONFIG(ADDR_SURF_P2) |
3194 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3195 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3196 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3197 				 PIPE_CONFIG(ADDR_SURF_P2) |
3198 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3199 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3200 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3201 				 PIPE_CONFIG(ADDR_SURF_P2) |
3202 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3203 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3204 
3205 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3206 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3207 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3208 				NUM_BANKS(ADDR_SURF_8_BANK));
3209 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3210 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3211 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3212 				NUM_BANKS(ADDR_SURF_8_BANK));
3213 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3214 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3215 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3216 				NUM_BANKS(ADDR_SURF_8_BANK));
3217 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3218 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3219 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3220 				NUM_BANKS(ADDR_SURF_8_BANK));
3221 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3222 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3223 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3224 				NUM_BANKS(ADDR_SURF_8_BANK));
3225 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3226 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3227 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3228 				NUM_BANKS(ADDR_SURF_8_BANK));
3229 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3230 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3231 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3232 				NUM_BANKS(ADDR_SURF_8_BANK));
3233 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3234 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3235 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3236 				NUM_BANKS(ADDR_SURF_16_BANK));
3237 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3238 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3239 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3240 				NUM_BANKS(ADDR_SURF_16_BANK));
3241 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3242 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3243 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3244 				 NUM_BANKS(ADDR_SURF_16_BANK));
3245 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3246 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3247 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3248 				 NUM_BANKS(ADDR_SURF_16_BANK));
3249 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3250 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3251 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3252 				 NUM_BANKS(ADDR_SURF_16_BANK));
3253 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3254 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3255 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3256 				 NUM_BANKS(ADDR_SURF_16_BANK));
3257 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3258 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3259 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3260 				 NUM_BANKS(ADDR_SURF_8_BANK));
3261 
3262 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3263 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3264 			    reg_offset != 23)
3265 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3266 
3267 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3268 			if (reg_offset != 7)
3269 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3270 
3271 		break;
3272 	default:
3273 		dev_warn(adev->dev,
3274 			 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3275 			 adev->asic_type);
3276 
3277 	case CHIP_CARRIZO:
3278 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3279 				PIPE_CONFIG(ADDR_SURF_P2) |
3280 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3281 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3282 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3283 				PIPE_CONFIG(ADDR_SURF_P2) |
3284 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3285 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3286 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3287 				PIPE_CONFIG(ADDR_SURF_P2) |
3288 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3289 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3290 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3291 				PIPE_CONFIG(ADDR_SURF_P2) |
3292 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3293 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3294 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3295 				PIPE_CONFIG(ADDR_SURF_P2) |
3296 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3297 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3298 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3299 				PIPE_CONFIG(ADDR_SURF_P2) |
3300 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3301 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3302 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3303 				PIPE_CONFIG(ADDR_SURF_P2) |
3304 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3305 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3306 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3307 				PIPE_CONFIG(ADDR_SURF_P2));
3308 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3309 				PIPE_CONFIG(ADDR_SURF_P2) |
3310 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3311 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3312 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3313 				 PIPE_CONFIG(ADDR_SURF_P2) |
3314 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3315 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3316 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3317 				 PIPE_CONFIG(ADDR_SURF_P2) |
3318 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3319 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3320 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3321 				 PIPE_CONFIG(ADDR_SURF_P2) |
3322 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3323 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3324 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3325 				 PIPE_CONFIG(ADDR_SURF_P2) |
3326 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3327 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3328 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3329 				 PIPE_CONFIG(ADDR_SURF_P2) |
3330 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3331 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3332 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3333 				 PIPE_CONFIG(ADDR_SURF_P2) |
3334 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3335 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3336 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3337 				 PIPE_CONFIG(ADDR_SURF_P2) |
3338 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3339 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3340 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3341 				 PIPE_CONFIG(ADDR_SURF_P2) |
3342 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3343 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3344 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3345 				 PIPE_CONFIG(ADDR_SURF_P2) |
3346 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3347 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3348 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3349 				 PIPE_CONFIG(ADDR_SURF_P2) |
3350 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3351 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3352 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3353 				 PIPE_CONFIG(ADDR_SURF_P2) |
3354 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3355 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3356 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3357 				 PIPE_CONFIG(ADDR_SURF_P2) |
3358 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3359 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3360 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3361 				 PIPE_CONFIG(ADDR_SURF_P2) |
3362 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3363 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3364 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3365 				 PIPE_CONFIG(ADDR_SURF_P2) |
3366 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3367 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3368 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3369 				 PIPE_CONFIG(ADDR_SURF_P2) |
3370 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3371 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3372 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3373 				 PIPE_CONFIG(ADDR_SURF_P2) |
3374 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3375 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3376 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3377 				 PIPE_CONFIG(ADDR_SURF_P2) |
3378 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3379 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3380 
3381 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3382 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3383 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3384 				NUM_BANKS(ADDR_SURF_8_BANK));
3385 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3386 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3387 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3388 				NUM_BANKS(ADDR_SURF_8_BANK));
3389 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3390 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3391 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3392 				NUM_BANKS(ADDR_SURF_8_BANK));
3393 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3394 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3395 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3396 				NUM_BANKS(ADDR_SURF_8_BANK));
3397 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3398 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3399 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3400 				NUM_BANKS(ADDR_SURF_8_BANK));
3401 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3402 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3403 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3404 				NUM_BANKS(ADDR_SURF_8_BANK));
3405 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3406 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3407 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3408 				NUM_BANKS(ADDR_SURF_8_BANK));
3409 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3410 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3411 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3412 				NUM_BANKS(ADDR_SURF_16_BANK));
3413 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3414 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3415 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3416 				NUM_BANKS(ADDR_SURF_16_BANK));
3417 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3418 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3419 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3420 				 NUM_BANKS(ADDR_SURF_16_BANK));
3421 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3422 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3423 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3424 				 NUM_BANKS(ADDR_SURF_16_BANK));
3425 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3426 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3427 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3428 				 NUM_BANKS(ADDR_SURF_16_BANK));
3429 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3430 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3431 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3432 				 NUM_BANKS(ADDR_SURF_16_BANK));
3433 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3434 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3435 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3436 				 NUM_BANKS(ADDR_SURF_8_BANK));
3437 
3438 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3439 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3440 			    reg_offset != 23)
3441 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3442 
3443 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3444 			if (reg_offset != 7)
3445 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3446 
3447 		break;
3448 	}
3449 }
3450 
3451 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3452 				  u32 se_num, u32 sh_num, u32 instance)
3453 {
3454 	u32 data;
3455 
3456 	if (instance == 0xffffffff)
3457 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3458 	else
3459 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3460 
3461 	if (se_num == 0xffffffff)
3462 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3463 	else
3464 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3465 
3466 	if (sh_num == 0xffffffff)
3467 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3468 	else
3469 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3470 
3471 	WREG32(mmGRBM_GFX_INDEX, data);
3472 }
3473 
3474 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3475 {
3476 	return (u32)((1ULL << bit_width) - 1);
3477 }
3478 
3479 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3480 {
3481 	u32 data, mask;
3482 
3483 	data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3484 		RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3485 
3486 	data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3487 
3488 	mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3489 				       adev->gfx.config.max_sh_per_se);
3490 
3491 	return (~data) & mask;
3492 }
3493 
3494 static void
3495 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3496 {
3497 	switch (adev->asic_type) {
3498 	case CHIP_FIJI:
3499 		*rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3500 			  RB_XSEL2(1) | PKR_MAP(2) |
3501 			  PKR_XSEL(1) | PKR_YSEL(1) |
3502 			  SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3503 		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3504 			   SE_PAIR_YSEL(2);
3505 		break;
3506 	case CHIP_TONGA:
3507 	case CHIP_POLARIS10:
3508 		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3509 			  SE_XSEL(1) | SE_YSEL(1);
3510 		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3511 			   SE_PAIR_YSEL(2);
3512 		break;
3513 	case CHIP_TOPAZ:
3514 	case CHIP_CARRIZO:
3515 		*rconf |= RB_MAP_PKR0(2);
3516 		*rconf1 |= 0x0;
3517 		break;
3518 	case CHIP_POLARIS11:
3519 		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3520 			  SE_XSEL(1) | SE_YSEL(1);
3521 		*rconf1 |= 0x0;
3522 		break;
3523 	case CHIP_STONEY:
3524 		*rconf |= 0x0;
3525 		*rconf1 |= 0x0;
3526 		break;
3527 	default:
3528 		DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3529 		break;
3530 	}
3531 }
3532 
3533 static void
3534 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3535 					u32 raster_config, u32 raster_config_1,
3536 					unsigned rb_mask, unsigned num_rb)
3537 {
3538 	unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3539 	unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3540 	unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3541 	unsigned rb_per_se = num_rb / num_se;
3542 	unsigned se_mask[4];
3543 	unsigned se;
3544 
3545 	se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3546 	se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3547 	se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3548 	se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3549 
3550 	WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3551 	WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3552 	WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3553 
3554 	if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3555 			     (!se_mask[2] && !se_mask[3]))) {
3556 		raster_config_1 &= ~SE_PAIR_MAP_MASK;
3557 
3558 		if (!se_mask[0] && !se_mask[1]) {
3559 			raster_config_1 |=
3560 				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3561 		} else {
3562 			raster_config_1 |=
3563 				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3564 		}
3565 	}
3566 
3567 	for (se = 0; se < num_se; se++) {
3568 		unsigned raster_config_se = raster_config;
3569 		unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3570 		unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3571 		int idx = (se / 2) * 2;
3572 
3573 		if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3574 			raster_config_se &= ~SE_MAP_MASK;
3575 
3576 			if (!se_mask[idx]) {
3577 				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3578 			} else {
3579 				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3580 			}
3581 		}
3582 
3583 		pkr0_mask &= rb_mask;
3584 		pkr1_mask &= rb_mask;
3585 		if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3586 			raster_config_se &= ~PKR_MAP_MASK;
3587 
3588 			if (!pkr0_mask) {
3589 				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3590 			} else {
3591 				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3592 			}
3593 		}
3594 
3595 		if (rb_per_se >= 2) {
3596 			unsigned rb0_mask = 1 << (se * rb_per_se);
3597 			unsigned rb1_mask = rb0_mask << 1;
3598 
3599 			rb0_mask &= rb_mask;
3600 			rb1_mask &= rb_mask;
3601 			if (!rb0_mask || !rb1_mask) {
3602 				raster_config_se &= ~RB_MAP_PKR0_MASK;
3603 
3604 				if (!rb0_mask) {
3605 					raster_config_se |=
3606 						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3607 				} else {
3608 					raster_config_se |=
3609 						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3610 				}
3611 			}
3612 
3613 			if (rb_per_se > 2) {
3614 				rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3615 				rb1_mask = rb0_mask << 1;
3616 				rb0_mask &= rb_mask;
3617 				rb1_mask &= rb_mask;
3618 				if (!rb0_mask || !rb1_mask) {
3619 					raster_config_se &= ~RB_MAP_PKR1_MASK;
3620 
3621 					if (!rb0_mask) {
3622 						raster_config_se |=
3623 							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3624 					} else {
3625 						raster_config_se |=
3626 							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3627 					}
3628 				}
3629 			}
3630 		}
3631 
3632 		/* GRBM_GFX_INDEX has a different offset on VI */
3633 		gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3634 		WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3635 		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3636 	}
3637 
3638 	/* GRBM_GFX_INDEX has a different offset on VI */
3639 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3640 }
3641 
3642 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3643 {
3644 	int i, j;
3645 	u32 data;
3646 	u32 raster_config = 0, raster_config_1 = 0;
3647 	u32 active_rbs = 0;
3648 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3649 					adev->gfx.config.max_sh_per_se;
3650 	unsigned num_rb_pipes;
3651 
3652 	mutex_lock(&adev->grbm_idx_mutex);
3653 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3654 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3655 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3656 			data = gfx_v8_0_get_rb_active_bitmap(adev);
3657 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3658 					       rb_bitmap_width_per_sh);
3659 		}
3660 	}
3661 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3662 
3663 	adev->gfx.config.backend_enable_mask = active_rbs;
3664 	adev->gfx.config.num_rbs = hweight32(active_rbs);
3665 
3666 	num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3667 			     adev->gfx.config.max_shader_engines, 16);
3668 
3669 	gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3670 
3671 	if (!adev->gfx.config.backend_enable_mask ||
3672 			adev->gfx.config.num_rbs >= num_rb_pipes) {
3673 		WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3674 		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3675 	} else {
3676 		gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3677 							adev->gfx.config.backend_enable_mask,
3678 							num_rb_pipes);
3679 	}
3680 
3681 	mutex_unlock(&adev->grbm_idx_mutex);
3682 }
3683 
3684 /**
3685  * gfx_v8_0_init_compute_vmid - gart enable
3686  *
3687  * @rdev: amdgpu_device pointer
3688  *
3689  * Initialize compute vmid sh_mem registers
3690  *
3691  */
3692 #define DEFAULT_SH_MEM_BASES	(0x6000)
3693 #define FIRST_COMPUTE_VMID	(8)
3694 #define LAST_COMPUTE_VMID	(16)
3695 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3696 {
3697 	int i;
3698 	uint32_t sh_mem_config;
3699 	uint32_t sh_mem_bases;
3700 
3701 	/*
3702 	 * Configure apertures:
3703 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3704 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3705 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3706 	 */
3707 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3708 
3709 	sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3710 			SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3711 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3712 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3713 			MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3714 			SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3715 
3716 	mutex_lock(&adev->srbm_mutex);
3717 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3718 		vi_srbm_select(adev, 0, 0, 0, i);
3719 		/* CP and shaders */
3720 		WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3721 		WREG32(mmSH_MEM_APE1_BASE, 1);
3722 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3723 		WREG32(mmSH_MEM_BASES, sh_mem_bases);
3724 	}
3725 	vi_srbm_select(adev, 0, 0, 0, 0);
3726 	mutex_unlock(&adev->srbm_mutex);
3727 }
3728 
3729 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3730 {
3731 	u32 tmp;
3732 	int i;
3733 
3734 	WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3735 	WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3736 	WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3737 	WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3738 
3739 	gfx_v8_0_tiling_mode_table_init(adev);
3740 	gfx_v8_0_setup_rb(adev);
3741 	gfx_v8_0_get_cu_info(adev);
3742 
3743 	/* XXX SH_MEM regs */
3744 	/* where to put LDS, scratch, GPUVM in FSA64 space */
3745 	mutex_lock(&adev->srbm_mutex);
3746 	for (i = 0; i < 16; i++) {
3747 		vi_srbm_select(adev, 0, 0, 0, i);
3748 		/* CP and shaders */
3749 		if (i == 0) {
3750 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3751 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3752 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3753 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3754 			WREG32(mmSH_MEM_CONFIG, tmp);
3755 		} else {
3756 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3757 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3758 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3759 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3760 			WREG32(mmSH_MEM_CONFIG, tmp);
3761 		}
3762 
3763 		WREG32(mmSH_MEM_APE1_BASE, 1);
3764 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3765 		WREG32(mmSH_MEM_BASES, 0);
3766 	}
3767 	vi_srbm_select(adev, 0, 0, 0, 0);
3768 	mutex_unlock(&adev->srbm_mutex);
3769 
3770 	gfx_v8_0_init_compute_vmid(adev);
3771 
3772 	mutex_lock(&adev->grbm_idx_mutex);
3773 	/*
3774 	 * making sure that the following register writes will be broadcasted
3775 	 * to all the shaders
3776 	 */
3777 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3778 
3779 	WREG32(mmPA_SC_FIFO_SIZE,
3780 		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
3781 			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3782 		   (adev->gfx.config.sc_prim_fifo_size_backend <<
3783 			PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3784 		   (adev->gfx.config.sc_hiz_tile_fifo_size <<
3785 			PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3786 		   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3787 			PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3788 	mutex_unlock(&adev->grbm_idx_mutex);
3789 
3790 }
3791 
3792 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3793 {
3794 	u32 i, j, k;
3795 	u32 mask;
3796 
3797 	mutex_lock(&adev->grbm_idx_mutex);
3798 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3799 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3800 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3801 			for (k = 0; k < adev->usec_timeout; k++) {
3802 				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3803 					break;
3804 				udelay(1);
3805 			}
3806 		}
3807 	}
3808 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3809 	mutex_unlock(&adev->grbm_idx_mutex);
3810 
3811 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3812 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3813 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3814 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3815 	for (k = 0; k < adev->usec_timeout; k++) {
3816 		if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3817 			break;
3818 		udelay(1);
3819 	}
3820 }
3821 
3822 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3823 					       bool enable)
3824 {
3825 	u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3826 
3827 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3828 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3829 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3830 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3831 
3832 	WREG32(mmCP_INT_CNTL_RING0, tmp);
3833 }
3834 
3835 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3836 {
3837 	/* csib */
3838 	WREG32(mmRLC_CSIB_ADDR_HI,
3839 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
3840 	WREG32(mmRLC_CSIB_ADDR_LO,
3841 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3842 	WREG32(mmRLC_CSIB_LENGTH,
3843 			adev->gfx.rlc.clear_state_size);
3844 }
3845 
3846 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3847 				int ind_offset,
3848 				int list_size,
3849 				int *unique_indices,
3850 				int *indices_count,
3851 				int max_indices,
3852 				int *ind_start_offsets,
3853 				int *offset_count,
3854 				int max_offset)
3855 {
3856 	int indices;
3857 	bool new_entry = true;
3858 
3859 	for (; ind_offset < list_size; ind_offset++) {
3860 
3861 		if (new_entry) {
3862 			new_entry = false;
3863 			ind_start_offsets[*offset_count] = ind_offset;
3864 			*offset_count = *offset_count + 1;
3865 			BUG_ON(*offset_count >= max_offset);
3866 		}
3867 
3868 		if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3869 			new_entry = true;
3870 			continue;
3871 		}
3872 
3873 		ind_offset += 2;
3874 
3875 		/* look for the matching indice */
3876 		for (indices = 0;
3877 			indices < *indices_count;
3878 			indices++) {
3879 			if (unique_indices[indices] ==
3880 				register_list_format[ind_offset])
3881 				break;
3882 		}
3883 
3884 		if (indices >= *indices_count) {
3885 			unique_indices[*indices_count] =
3886 				register_list_format[ind_offset];
3887 			indices = *indices_count;
3888 			*indices_count = *indices_count + 1;
3889 			BUG_ON(*indices_count >= max_indices);
3890 		}
3891 
3892 		register_list_format[ind_offset] = indices;
3893 	}
3894 }
3895 
3896 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3897 {
3898 	int i, temp, data;
3899 	int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3900 	int indices_count = 0;
3901 	int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3902 	int offset_count = 0;
3903 
3904 	int list_size;
3905 	unsigned int *register_list_format =
3906 		kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3907 	if (register_list_format == NULL)
3908 		return -ENOMEM;
3909 	memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3910 			adev->gfx.rlc.reg_list_format_size_bytes);
3911 
3912 	gfx_v8_0_parse_ind_reg_list(register_list_format,
3913 				RLC_FormatDirectRegListLength,
3914 				adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3915 				unique_indices,
3916 				&indices_count,
3917 				sizeof(unique_indices) / sizeof(int),
3918 				indirect_start_offsets,
3919 				&offset_count,
3920 				sizeof(indirect_start_offsets)/sizeof(int));
3921 
3922 	/* save and restore list */
3923 	WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3924 
3925 	WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3926 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3927 		WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3928 
3929 	/* indirect list */
3930 	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3931 	for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3932 		WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3933 
3934 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3935 	list_size = list_size >> 1;
3936 	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3937 	WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3938 
3939 	/* starting offsets starts */
3940 	WREG32(mmRLC_GPM_SCRATCH_ADDR,
3941 		adev->gfx.rlc.starting_offsets_start);
3942 	for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3943 		WREG32(mmRLC_GPM_SCRATCH_DATA,
3944 				indirect_start_offsets[i]);
3945 
3946 	/* unique indices */
3947 	temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3948 	data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3949 	for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3950 		amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false);
3951 		amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false);
3952 	}
3953 	kfree(register_list_format);
3954 
3955 	return 0;
3956 }
3957 
3958 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3959 {
3960 	WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3961 }
3962 
3963 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3964 {
3965 	uint32_t data;
3966 
3967 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3968 			      AMD_PG_SUPPORT_GFX_SMG |
3969 			      AMD_PG_SUPPORT_GFX_DMG)) {
3970 		WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
3971 
3972 		data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
3973 		data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
3974 		data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
3975 		data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
3976 		WREG32(mmRLC_PG_DELAY, data);
3977 
3978 		WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
3979 		WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
3980 	}
3981 }
3982 
3983 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
3984 						bool enable)
3985 {
3986 	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
3987 }
3988 
3989 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
3990 						  bool enable)
3991 {
3992 	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
3993 }
3994 
3995 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
3996 {
3997 	WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 1 : 0);
3998 }
3999 
4000 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4001 {
4002 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
4003 			      AMD_PG_SUPPORT_GFX_SMG |
4004 			      AMD_PG_SUPPORT_GFX_DMG |
4005 			      AMD_PG_SUPPORT_CP |
4006 			      AMD_PG_SUPPORT_GDS |
4007 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
4008 		gfx_v8_0_init_csb(adev);
4009 		gfx_v8_0_init_save_restore_list(adev);
4010 		gfx_v8_0_enable_save_restore_machine(adev);
4011 
4012 		if ((adev->asic_type == CHIP_CARRIZO) ||
4013 		    (adev->asic_type == CHIP_STONEY)) {
4014 			WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4015 			gfx_v8_0_init_power_gating(adev);
4016 			WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4017 			if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4018 				cz_enable_sck_slow_down_on_power_up(adev, true);
4019 				cz_enable_sck_slow_down_on_power_down(adev, true);
4020 			} else {
4021 				cz_enable_sck_slow_down_on_power_up(adev, false);
4022 				cz_enable_sck_slow_down_on_power_down(adev, false);
4023 			}
4024 			if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4025 				cz_enable_cp_power_gating(adev, true);
4026 			else
4027 				cz_enable_cp_power_gating(adev, false);
4028 		} else if (adev->asic_type == CHIP_POLARIS11) {
4029 			gfx_v8_0_init_power_gating(adev);
4030 		}
4031 	}
4032 }
4033 
4034 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4035 {
4036 	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4037 
4038 	gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4039 	gfx_v8_0_wait_for_rlc_serdes(adev);
4040 }
4041 
4042 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4043 {
4044 	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4045 	udelay(50);
4046 
4047 	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4048 	udelay(50);
4049 }
4050 
4051 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4052 {
4053 	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4054 
4055 	/* carrizo do enable cp interrupt after cp inited */
4056 	if (!(adev->flags & AMD_IS_APU))
4057 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4058 
4059 	udelay(50);
4060 }
4061 
4062 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4063 {
4064 	const struct rlc_firmware_header_v2_0 *hdr;
4065 	const __le32 *fw_data;
4066 	unsigned i, fw_size;
4067 
4068 	if (!adev->gfx.rlc_fw)
4069 		return -EINVAL;
4070 
4071 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4072 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
4073 
4074 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4075 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4076 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4077 
4078 	WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4079 	for (i = 0; i < fw_size; i++)
4080 		WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4081 	WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4082 
4083 	return 0;
4084 }
4085 
4086 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4087 {
4088 	int r;
4089 	u32 tmp;
4090 
4091 	gfx_v8_0_rlc_stop(adev);
4092 
4093 	/* disable CG */
4094 	tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4095 	tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4096 		 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4097 	WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4098 	if (adev->asic_type == CHIP_POLARIS11 ||
4099 	    adev->asic_type == CHIP_POLARIS10) {
4100 		tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4101 		tmp &= ~0x3;
4102 		WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4103 	}
4104 
4105 	/* disable PG */
4106 	WREG32(mmRLC_PG_CNTL, 0);
4107 
4108 	gfx_v8_0_rlc_reset(adev);
4109 	gfx_v8_0_init_pg(adev);
4110 
4111 	if (!adev->pp_enabled) {
4112 		if (!adev->firmware.smu_load) {
4113 			/* legacy rlc firmware loading */
4114 			r = gfx_v8_0_rlc_load_microcode(adev);
4115 			if (r)
4116 				return r;
4117 		} else {
4118 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4119 							AMDGPU_UCODE_ID_RLC_G);
4120 			if (r)
4121 				return -EINVAL;
4122 		}
4123 	}
4124 
4125 	gfx_v8_0_rlc_start(adev);
4126 
4127 	return 0;
4128 }
4129 
4130 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4131 {
4132 	int i;
4133 	u32 tmp = RREG32(mmCP_ME_CNTL);
4134 
4135 	if (enable) {
4136 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4137 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4138 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4139 	} else {
4140 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4141 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4142 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4143 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4144 			adev->gfx.gfx_ring[i].ready = false;
4145 	}
4146 	WREG32(mmCP_ME_CNTL, tmp);
4147 	udelay(50);
4148 }
4149 
4150 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4151 {
4152 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
4153 	const struct gfx_firmware_header_v1_0 *ce_hdr;
4154 	const struct gfx_firmware_header_v1_0 *me_hdr;
4155 	const __le32 *fw_data;
4156 	unsigned i, fw_size;
4157 
4158 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4159 		return -EINVAL;
4160 
4161 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4162 		adev->gfx.pfp_fw->data;
4163 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4164 		adev->gfx.ce_fw->data;
4165 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
4166 		adev->gfx.me_fw->data;
4167 
4168 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4169 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4170 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4171 
4172 	gfx_v8_0_cp_gfx_enable(adev, false);
4173 
4174 	/* PFP */
4175 	fw_data = (const __le32 *)
4176 		(adev->gfx.pfp_fw->data +
4177 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4178 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4179 	WREG32(mmCP_PFP_UCODE_ADDR, 0);
4180 	for (i = 0; i < fw_size; i++)
4181 		WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4182 	WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4183 
4184 	/* CE */
4185 	fw_data = (const __le32 *)
4186 		(adev->gfx.ce_fw->data +
4187 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4188 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4189 	WREG32(mmCP_CE_UCODE_ADDR, 0);
4190 	for (i = 0; i < fw_size; i++)
4191 		WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4192 	WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4193 
4194 	/* ME */
4195 	fw_data = (const __le32 *)
4196 		(adev->gfx.me_fw->data +
4197 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4198 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4199 	WREG32(mmCP_ME_RAM_WADDR, 0);
4200 	for (i = 0; i < fw_size; i++)
4201 		WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4202 	WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4203 
4204 	return 0;
4205 }
4206 
4207 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4208 {
4209 	u32 count = 0;
4210 	const struct cs_section_def *sect = NULL;
4211 	const struct cs_extent_def *ext = NULL;
4212 
4213 	/* begin clear state */
4214 	count += 2;
4215 	/* context control state */
4216 	count += 3;
4217 
4218 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4219 		for (ext = sect->section; ext->extent != NULL; ++ext) {
4220 			if (sect->id == SECT_CONTEXT)
4221 				count += 2 + ext->reg_count;
4222 			else
4223 				return 0;
4224 		}
4225 	}
4226 	/* pa_sc_raster_config/pa_sc_raster_config1 */
4227 	count += 4;
4228 	/* end clear state */
4229 	count += 2;
4230 	/* clear state */
4231 	count += 2;
4232 
4233 	return count;
4234 }
4235 
4236 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4237 {
4238 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4239 	const struct cs_section_def *sect = NULL;
4240 	const struct cs_extent_def *ext = NULL;
4241 	int r, i;
4242 
4243 	/* init the CP */
4244 	WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4245 	WREG32(mmCP_ENDIAN_SWAP, 0);
4246 	WREG32(mmCP_DEVICE_ID, 1);
4247 
4248 	gfx_v8_0_cp_gfx_enable(adev, true);
4249 
4250 	r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4251 	if (r) {
4252 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4253 		return r;
4254 	}
4255 
4256 	/* clear state buffer */
4257 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4258 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4259 
4260 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4261 	amdgpu_ring_write(ring, 0x80000000);
4262 	amdgpu_ring_write(ring, 0x80000000);
4263 
4264 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4265 		for (ext = sect->section; ext->extent != NULL; ++ext) {
4266 			if (sect->id == SECT_CONTEXT) {
4267 				amdgpu_ring_write(ring,
4268 				       PACKET3(PACKET3_SET_CONTEXT_REG,
4269 					       ext->reg_count));
4270 				amdgpu_ring_write(ring,
4271 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4272 				for (i = 0; i < ext->reg_count; i++)
4273 					amdgpu_ring_write(ring, ext->extent[i]);
4274 			}
4275 		}
4276 	}
4277 
4278 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4279 	amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4280 	switch (adev->asic_type) {
4281 	case CHIP_TONGA:
4282 	case CHIP_POLARIS10:
4283 		amdgpu_ring_write(ring, 0x16000012);
4284 		amdgpu_ring_write(ring, 0x0000002A);
4285 		break;
4286 	case CHIP_POLARIS11:
4287 		amdgpu_ring_write(ring, 0x16000012);
4288 		amdgpu_ring_write(ring, 0x00000000);
4289 		break;
4290 	case CHIP_FIJI:
4291 		amdgpu_ring_write(ring, 0x3a00161a);
4292 		amdgpu_ring_write(ring, 0x0000002e);
4293 		break;
4294 	case CHIP_CARRIZO:
4295 		amdgpu_ring_write(ring, 0x00000002);
4296 		amdgpu_ring_write(ring, 0x00000000);
4297 		break;
4298 	case CHIP_TOPAZ:
4299 		amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4300 				0x00000000 : 0x00000002);
4301 		amdgpu_ring_write(ring, 0x00000000);
4302 		break;
4303 	case CHIP_STONEY:
4304 		amdgpu_ring_write(ring, 0x00000000);
4305 		amdgpu_ring_write(ring, 0x00000000);
4306 		break;
4307 	default:
4308 		BUG();
4309 	}
4310 
4311 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4312 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4313 
4314 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4315 	amdgpu_ring_write(ring, 0);
4316 
4317 	/* init the CE partitions */
4318 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4319 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4320 	amdgpu_ring_write(ring, 0x8000);
4321 	amdgpu_ring_write(ring, 0x8000);
4322 
4323 	amdgpu_ring_commit(ring);
4324 
4325 	return 0;
4326 }
4327 
4328 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4329 {
4330 	struct amdgpu_ring *ring;
4331 	u32 tmp;
4332 	u32 rb_bufsz;
4333 	u64 rb_addr, rptr_addr;
4334 	int r;
4335 
4336 	/* Set the write pointer delay */
4337 	WREG32(mmCP_RB_WPTR_DELAY, 0);
4338 
4339 	/* set the RB to use vmid 0 */
4340 	WREG32(mmCP_RB_VMID, 0);
4341 
4342 	/* Set ring buffer size */
4343 	ring = &adev->gfx.gfx_ring[0];
4344 	rb_bufsz = order_base_2(ring->ring_size / 8);
4345 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4346 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4347 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4348 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4349 #ifdef __BIG_ENDIAN
4350 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4351 #endif
4352 	WREG32(mmCP_RB0_CNTL, tmp);
4353 
4354 	/* Initialize the ring buffer's read and write pointers */
4355 	WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4356 	ring->wptr = 0;
4357 	WREG32(mmCP_RB0_WPTR, ring->wptr);
4358 
4359 	/* set the wb address wether it's enabled or not */
4360 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4361 	WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4362 	WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4363 
4364 	mdelay(1);
4365 	WREG32(mmCP_RB0_CNTL, tmp);
4366 
4367 	rb_addr = ring->gpu_addr >> 8;
4368 	WREG32(mmCP_RB0_BASE, rb_addr);
4369 	WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4370 
4371 	/* no gfx doorbells on iceland */
4372 	if (adev->asic_type != CHIP_TOPAZ) {
4373 		tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4374 		if (ring->use_doorbell) {
4375 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4376 					    DOORBELL_OFFSET, ring->doorbell_index);
4377 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4378 					    DOORBELL_HIT, 0);
4379 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4380 					    DOORBELL_EN, 1);
4381 		} else {
4382 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4383 					    DOORBELL_EN, 0);
4384 		}
4385 		WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4386 
4387 		if (adev->asic_type == CHIP_TONGA) {
4388 			tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4389 					    DOORBELL_RANGE_LOWER,
4390 					    AMDGPU_DOORBELL_GFX_RING0);
4391 			WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4392 
4393 			WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4394 			       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4395 		}
4396 
4397 	}
4398 
4399 	/* start the ring */
4400 	gfx_v8_0_cp_gfx_start(adev);
4401 	ring->ready = true;
4402 	r = amdgpu_ring_test_ring(ring);
4403 	if (r)
4404 		ring->ready = false;
4405 
4406 	return r;
4407 }
4408 
4409 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4410 {
4411 	int i;
4412 
4413 	if (enable) {
4414 		WREG32(mmCP_MEC_CNTL, 0);
4415 	} else {
4416 		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4417 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
4418 			adev->gfx.compute_ring[i].ready = false;
4419 	}
4420 	udelay(50);
4421 }
4422 
4423 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4424 {
4425 	const struct gfx_firmware_header_v1_0 *mec_hdr;
4426 	const __le32 *fw_data;
4427 	unsigned i, fw_size;
4428 
4429 	if (!adev->gfx.mec_fw)
4430 		return -EINVAL;
4431 
4432 	gfx_v8_0_cp_compute_enable(adev, false);
4433 
4434 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4435 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4436 
4437 	fw_data = (const __le32 *)
4438 		(adev->gfx.mec_fw->data +
4439 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4440 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4441 
4442 	/* MEC1 */
4443 	WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4444 	for (i = 0; i < fw_size; i++)
4445 		WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4446 	WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4447 
4448 	/* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4449 	if (adev->gfx.mec2_fw) {
4450 		const struct gfx_firmware_header_v1_0 *mec2_hdr;
4451 
4452 		mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4453 		amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4454 
4455 		fw_data = (const __le32 *)
4456 			(adev->gfx.mec2_fw->data +
4457 			 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4458 		fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4459 
4460 		WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4461 		for (i = 0; i < fw_size; i++)
4462 			WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4463 		WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4464 	}
4465 
4466 	return 0;
4467 }
4468 
4469 struct vi_mqd {
4470 	uint32_t header;  /* ordinal0 */
4471 	uint32_t compute_dispatch_initiator;  /* ordinal1 */
4472 	uint32_t compute_dim_x;  /* ordinal2 */
4473 	uint32_t compute_dim_y;  /* ordinal3 */
4474 	uint32_t compute_dim_z;  /* ordinal4 */
4475 	uint32_t compute_start_x;  /* ordinal5 */
4476 	uint32_t compute_start_y;  /* ordinal6 */
4477 	uint32_t compute_start_z;  /* ordinal7 */
4478 	uint32_t compute_num_thread_x;  /* ordinal8 */
4479 	uint32_t compute_num_thread_y;  /* ordinal9 */
4480 	uint32_t compute_num_thread_z;  /* ordinal10 */
4481 	uint32_t compute_pipelinestat_enable;  /* ordinal11 */
4482 	uint32_t compute_perfcount_enable;  /* ordinal12 */
4483 	uint32_t compute_pgm_lo;  /* ordinal13 */
4484 	uint32_t compute_pgm_hi;  /* ordinal14 */
4485 	uint32_t compute_tba_lo;  /* ordinal15 */
4486 	uint32_t compute_tba_hi;  /* ordinal16 */
4487 	uint32_t compute_tma_lo;  /* ordinal17 */
4488 	uint32_t compute_tma_hi;  /* ordinal18 */
4489 	uint32_t compute_pgm_rsrc1;  /* ordinal19 */
4490 	uint32_t compute_pgm_rsrc2;  /* ordinal20 */
4491 	uint32_t compute_vmid;  /* ordinal21 */
4492 	uint32_t compute_resource_limits;  /* ordinal22 */
4493 	uint32_t compute_static_thread_mgmt_se0;  /* ordinal23 */
4494 	uint32_t compute_static_thread_mgmt_se1;  /* ordinal24 */
4495 	uint32_t compute_tmpring_size;  /* ordinal25 */
4496 	uint32_t compute_static_thread_mgmt_se2;  /* ordinal26 */
4497 	uint32_t compute_static_thread_mgmt_se3;  /* ordinal27 */
4498 	uint32_t compute_restart_x;  /* ordinal28 */
4499 	uint32_t compute_restart_y;  /* ordinal29 */
4500 	uint32_t compute_restart_z;  /* ordinal30 */
4501 	uint32_t compute_thread_trace_enable;  /* ordinal31 */
4502 	uint32_t compute_misc_reserved;  /* ordinal32 */
4503 	uint32_t compute_dispatch_id;  /* ordinal33 */
4504 	uint32_t compute_threadgroup_id;  /* ordinal34 */
4505 	uint32_t compute_relaunch;  /* ordinal35 */
4506 	uint32_t compute_wave_restore_addr_lo;  /* ordinal36 */
4507 	uint32_t compute_wave_restore_addr_hi;  /* ordinal37 */
4508 	uint32_t compute_wave_restore_control;  /* ordinal38 */
4509 	uint32_t reserved9;  /* ordinal39 */
4510 	uint32_t reserved10;  /* ordinal40 */
4511 	uint32_t reserved11;  /* ordinal41 */
4512 	uint32_t reserved12;  /* ordinal42 */
4513 	uint32_t reserved13;  /* ordinal43 */
4514 	uint32_t reserved14;  /* ordinal44 */
4515 	uint32_t reserved15;  /* ordinal45 */
4516 	uint32_t reserved16;  /* ordinal46 */
4517 	uint32_t reserved17;  /* ordinal47 */
4518 	uint32_t reserved18;  /* ordinal48 */
4519 	uint32_t reserved19;  /* ordinal49 */
4520 	uint32_t reserved20;  /* ordinal50 */
4521 	uint32_t reserved21;  /* ordinal51 */
4522 	uint32_t reserved22;  /* ordinal52 */
4523 	uint32_t reserved23;  /* ordinal53 */
4524 	uint32_t reserved24;  /* ordinal54 */
4525 	uint32_t reserved25;  /* ordinal55 */
4526 	uint32_t reserved26;  /* ordinal56 */
4527 	uint32_t reserved27;  /* ordinal57 */
4528 	uint32_t reserved28;  /* ordinal58 */
4529 	uint32_t reserved29;  /* ordinal59 */
4530 	uint32_t reserved30;  /* ordinal60 */
4531 	uint32_t reserved31;  /* ordinal61 */
4532 	uint32_t reserved32;  /* ordinal62 */
4533 	uint32_t reserved33;  /* ordinal63 */
4534 	uint32_t reserved34;  /* ordinal64 */
4535 	uint32_t compute_user_data_0;  /* ordinal65 */
4536 	uint32_t compute_user_data_1;  /* ordinal66 */
4537 	uint32_t compute_user_data_2;  /* ordinal67 */
4538 	uint32_t compute_user_data_3;  /* ordinal68 */
4539 	uint32_t compute_user_data_4;  /* ordinal69 */
4540 	uint32_t compute_user_data_5;  /* ordinal70 */
4541 	uint32_t compute_user_data_6;  /* ordinal71 */
4542 	uint32_t compute_user_data_7;  /* ordinal72 */
4543 	uint32_t compute_user_data_8;  /* ordinal73 */
4544 	uint32_t compute_user_data_9;  /* ordinal74 */
4545 	uint32_t compute_user_data_10;  /* ordinal75 */
4546 	uint32_t compute_user_data_11;  /* ordinal76 */
4547 	uint32_t compute_user_data_12;  /* ordinal77 */
4548 	uint32_t compute_user_data_13;  /* ordinal78 */
4549 	uint32_t compute_user_data_14;  /* ordinal79 */
4550 	uint32_t compute_user_data_15;  /* ordinal80 */
4551 	uint32_t cp_compute_csinvoc_count_lo;  /* ordinal81 */
4552 	uint32_t cp_compute_csinvoc_count_hi;  /* ordinal82 */
4553 	uint32_t reserved35;  /* ordinal83 */
4554 	uint32_t reserved36;  /* ordinal84 */
4555 	uint32_t reserved37;  /* ordinal85 */
4556 	uint32_t cp_mqd_query_time_lo;  /* ordinal86 */
4557 	uint32_t cp_mqd_query_time_hi;  /* ordinal87 */
4558 	uint32_t cp_mqd_connect_start_time_lo;  /* ordinal88 */
4559 	uint32_t cp_mqd_connect_start_time_hi;  /* ordinal89 */
4560 	uint32_t cp_mqd_connect_end_time_lo;  /* ordinal90 */
4561 	uint32_t cp_mqd_connect_end_time_hi;  /* ordinal91 */
4562 	uint32_t cp_mqd_connect_end_wf_count;  /* ordinal92 */
4563 	uint32_t cp_mqd_connect_end_pq_rptr;  /* ordinal93 */
4564 	uint32_t cp_mqd_connect_end_pq_wptr;  /* ordinal94 */
4565 	uint32_t cp_mqd_connect_end_ib_rptr;  /* ordinal95 */
4566 	uint32_t reserved38;  /* ordinal96 */
4567 	uint32_t reserved39;  /* ordinal97 */
4568 	uint32_t cp_mqd_save_start_time_lo;  /* ordinal98 */
4569 	uint32_t cp_mqd_save_start_time_hi;  /* ordinal99 */
4570 	uint32_t cp_mqd_save_end_time_lo;  /* ordinal100 */
4571 	uint32_t cp_mqd_save_end_time_hi;  /* ordinal101 */
4572 	uint32_t cp_mqd_restore_start_time_lo;  /* ordinal102 */
4573 	uint32_t cp_mqd_restore_start_time_hi;  /* ordinal103 */
4574 	uint32_t cp_mqd_restore_end_time_lo;  /* ordinal104 */
4575 	uint32_t cp_mqd_restore_end_time_hi;  /* ordinal105 */
4576 	uint32_t reserved40;  /* ordinal106 */
4577 	uint32_t reserved41;  /* ordinal107 */
4578 	uint32_t gds_cs_ctxsw_cnt0;  /* ordinal108 */
4579 	uint32_t gds_cs_ctxsw_cnt1;  /* ordinal109 */
4580 	uint32_t gds_cs_ctxsw_cnt2;  /* ordinal110 */
4581 	uint32_t gds_cs_ctxsw_cnt3;  /* ordinal111 */
4582 	uint32_t reserved42;  /* ordinal112 */
4583 	uint32_t reserved43;  /* ordinal113 */
4584 	uint32_t cp_pq_exe_status_lo;  /* ordinal114 */
4585 	uint32_t cp_pq_exe_status_hi;  /* ordinal115 */
4586 	uint32_t cp_packet_id_lo;  /* ordinal116 */
4587 	uint32_t cp_packet_id_hi;  /* ordinal117 */
4588 	uint32_t cp_packet_exe_status_lo;  /* ordinal118 */
4589 	uint32_t cp_packet_exe_status_hi;  /* ordinal119 */
4590 	uint32_t gds_save_base_addr_lo;  /* ordinal120 */
4591 	uint32_t gds_save_base_addr_hi;  /* ordinal121 */
4592 	uint32_t gds_save_mask_lo;  /* ordinal122 */
4593 	uint32_t gds_save_mask_hi;  /* ordinal123 */
4594 	uint32_t ctx_save_base_addr_lo;  /* ordinal124 */
4595 	uint32_t ctx_save_base_addr_hi;  /* ordinal125 */
4596 	uint32_t reserved44;  /* ordinal126 */
4597 	uint32_t reserved45;  /* ordinal127 */
4598 	uint32_t cp_mqd_base_addr_lo;  /* ordinal128 */
4599 	uint32_t cp_mqd_base_addr_hi;  /* ordinal129 */
4600 	uint32_t cp_hqd_active;  /* ordinal130 */
4601 	uint32_t cp_hqd_vmid;  /* ordinal131 */
4602 	uint32_t cp_hqd_persistent_state;  /* ordinal132 */
4603 	uint32_t cp_hqd_pipe_priority;  /* ordinal133 */
4604 	uint32_t cp_hqd_queue_priority;  /* ordinal134 */
4605 	uint32_t cp_hqd_quantum;  /* ordinal135 */
4606 	uint32_t cp_hqd_pq_base_lo;  /* ordinal136 */
4607 	uint32_t cp_hqd_pq_base_hi;  /* ordinal137 */
4608 	uint32_t cp_hqd_pq_rptr;  /* ordinal138 */
4609 	uint32_t cp_hqd_pq_rptr_report_addr_lo;  /* ordinal139 */
4610 	uint32_t cp_hqd_pq_rptr_report_addr_hi;  /* ordinal140 */
4611 	uint32_t cp_hqd_pq_wptr_poll_addr;  /* ordinal141 */
4612 	uint32_t cp_hqd_pq_wptr_poll_addr_hi;  /* ordinal142 */
4613 	uint32_t cp_hqd_pq_doorbell_control;  /* ordinal143 */
4614 	uint32_t cp_hqd_pq_wptr;  /* ordinal144 */
4615 	uint32_t cp_hqd_pq_control;  /* ordinal145 */
4616 	uint32_t cp_hqd_ib_base_addr_lo;  /* ordinal146 */
4617 	uint32_t cp_hqd_ib_base_addr_hi;  /* ordinal147 */
4618 	uint32_t cp_hqd_ib_rptr;  /* ordinal148 */
4619 	uint32_t cp_hqd_ib_control;  /* ordinal149 */
4620 	uint32_t cp_hqd_iq_timer;  /* ordinal150 */
4621 	uint32_t cp_hqd_iq_rptr;  /* ordinal151 */
4622 	uint32_t cp_hqd_dequeue_request;  /* ordinal152 */
4623 	uint32_t cp_hqd_dma_offload;  /* ordinal153 */
4624 	uint32_t cp_hqd_sema_cmd;  /* ordinal154 */
4625 	uint32_t cp_hqd_msg_type;  /* ordinal155 */
4626 	uint32_t cp_hqd_atomic0_preop_lo;  /* ordinal156 */
4627 	uint32_t cp_hqd_atomic0_preop_hi;  /* ordinal157 */
4628 	uint32_t cp_hqd_atomic1_preop_lo;  /* ordinal158 */
4629 	uint32_t cp_hqd_atomic1_preop_hi;  /* ordinal159 */
4630 	uint32_t cp_hqd_hq_status0;  /* ordinal160 */
4631 	uint32_t cp_hqd_hq_control0;  /* ordinal161 */
4632 	uint32_t cp_mqd_control;  /* ordinal162 */
4633 	uint32_t cp_hqd_hq_status1;  /* ordinal163 */
4634 	uint32_t cp_hqd_hq_control1;  /* ordinal164 */
4635 	uint32_t cp_hqd_eop_base_addr_lo;  /* ordinal165 */
4636 	uint32_t cp_hqd_eop_base_addr_hi;  /* ordinal166 */
4637 	uint32_t cp_hqd_eop_control;  /* ordinal167 */
4638 	uint32_t cp_hqd_eop_rptr;  /* ordinal168 */
4639 	uint32_t cp_hqd_eop_wptr;  /* ordinal169 */
4640 	uint32_t cp_hqd_eop_done_events;  /* ordinal170 */
4641 	uint32_t cp_hqd_ctx_save_base_addr_lo;  /* ordinal171 */
4642 	uint32_t cp_hqd_ctx_save_base_addr_hi;  /* ordinal172 */
4643 	uint32_t cp_hqd_ctx_save_control;  /* ordinal173 */
4644 	uint32_t cp_hqd_cntl_stack_offset;  /* ordinal174 */
4645 	uint32_t cp_hqd_cntl_stack_size;  /* ordinal175 */
4646 	uint32_t cp_hqd_wg_state_offset;  /* ordinal176 */
4647 	uint32_t cp_hqd_ctx_save_size;  /* ordinal177 */
4648 	uint32_t cp_hqd_gds_resource_state;  /* ordinal178 */
4649 	uint32_t cp_hqd_error;  /* ordinal179 */
4650 	uint32_t cp_hqd_eop_wptr_mem;  /* ordinal180 */
4651 	uint32_t cp_hqd_eop_dones;  /* ordinal181 */
4652 	uint32_t reserved46;  /* ordinal182 */
4653 	uint32_t reserved47;  /* ordinal183 */
4654 	uint32_t reserved48;  /* ordinal184 */
4655 	uint32_t reserved49;  /* ordinal185 */
4656 	uint32_t reserved50;  /* ordinal186 */
4657 	uint32_t reserved51;  /* ordinal187 */
4658 	uint32_t reserved52;  /* ordinal188 */
4659 	uint32_t reserved53;  /* ordinal189 */
4660 	uint32_t reserved54;  /* ordinal190 */
4661 	uint32_t reserved55;  /* ordinal191 */
4662 	uint32_t iqtimer_pkt_header;  /* ordinal192 */
4663 	uint32_t iqtimer_pkt_dw0;  /* ordinal193 */
4664 	uint32_t iqtimer_pkt_dw1;  /* ordinal194 */
4665 	uint32_t iqtimer_pkt_dw2;  /* ordinal195 */
4666 	uint32_t iqtimer_pkt_dw3;  /* ordinal196 */
4667 	uint32_t iqtimer_pkt_dw4;  /* ordinal197 */
4668 	uint32_t iqtimer_pkt_dw5;  /* ordinal198 */
4669 	uint32_t iqtimer_pkt_dw6;  /* ordinal199 */
4670 	uint32_t iqtimer_pkt_dw7;  /* ordinal200 */
4671 	uint32_t iqtimer_pkt_dw8;  /* ordinal201 */
4672 	uint32_t iqtimer_pkt_dw9;  /* ordinal202 */
4673 	uint32_t iqtimer_pkt_dw10;  /* ordinal203 */
4674 	uint32_t iqtimer_pkt_dw11;  /* ordinal204 */
4675 	uint32_t iqtimer_pkt_dw12;  /* ordinal205 */
4676 	uint32_t iqtimer_pkt_dw13;  /* ordinal206 */
4677 	uint32_t iqtimer_pkt_dw14;  /* ordinal207 */
4678 	uint32_t iqtimer_pkt_dw15;  /* ordinal208 */
4679 	uint32_t iqtimer_pkt_dw16;  /* ordinal209 */
4680 	uint32_t iqtimer_pkt_dw17;  /* ordinal210 */
4681 	uint32_t iqtimer_pkt_dw18;  /* ordinal211 */
4682 	uint32_t iqtimer_pkt_dw19;  /* ordinal212 */
4683 	uint32_t iqtimer_pkt_dw20;  /* ordinal213 */
4684 	uint32_t iqtimer_pkt_dw21;  /* ordinal214 */
4685 	uint32_t iqtimer_pkt_dw22;  /* ordinal215 */
4686 	uint32_t iqtimer_pkt_dw23;  /* ordinal216 */
4687 	uint32_t iqtimer_pkt_dw24;  /* ordinal217 */
4688 	uint32_t iqtimer_pkt_dw25;  /* ordinal218 */
4689 	uint32_t iqtimer_pkt_dw26;  /* ordinal219 */
4690 	uint32_t iqtimer_pkt_dw27;  /* ordinal220 */
4691 	uint32_t iqtimer_pkt_dw28;  /* ordinal221 */
4692 	uint32_t iqtimer_pkt_dw29;  /* ordinal222 */
4693 	uint32_t iqtimer_pkt_dw30;  /* ordinal223 */
4694 	uint32_t iqtimer_pkt_dw31;  /* ordinal224 */
4695 	uint32_t reserved56;  /* ordinal225 */
4696 	uint32_t reserved57;  /* ordinal226 */
4697 	uint32_t reserved58;  /* ordinal227 */
4698 	uint32_t set_resources_header;  /* ordinal228 */
4699 	uint32_t set_resources_dw1;  /* ordinal229 */
4700 	uint32_t set_resources_dw2;  /* ordinal230 */
4701 	uint32_t set_resources_dw3;  /* ordinal231 */
4702 	uint32_t set_resources_dw4;  /* ordinal232 */
4703 	uint32_t set_resources_dw5;  /* ordinal233 */
4704 	uint32_t set_resources_dw6;  /* ordinal234 */
4705 	uint32_t set_resources_dw7;  /* ordinal235 */
4706 	uint32_t reserved59;  /* ordinal236 */
4707 	uint32_t reserved60;  /* ordinal237 */
4708 	uint32_t reserved61;  /* ordinal238 */
4709 	uint32_t reserved62;  /* ordinal239 */
4710 	uint32_t reserved63;  /* ordinal240 */
4711 	uint32_t reserved64;  /* ordinal241 */
4712 	uint32_t reserved65;  /* ordinal242 */
4713 	uint32_t reserved66;  /* ordinal243 */
4714 	uint32_t reserved67;  /* ordinal244 */
4715 	uint32_t reserved68;  /* ordinal245 */
4716 	uint32_t reserved69;  /* ordinal246 */
4717 	uint32_t reserved70;  /* ordinal247 */
4718 	uint32_t reserved71;  /* ordinal248 */
4719 	uint32_t reserved72;  /* ordinal249 */
4720 	uint32_t reserved73;  /* ordinal250 */
4721 	uint32_t reserved74;  /* ordinal251 */
4722 	uint32_t reserved75;  /* ordinal252 */
4723 	uint32_t reserved76;  /* ordinal253 */
4724 	uint32_t reserved77;  /* ordinal254 */
4725 	uint32_t reserved78;  /* ordinal255 */
4726 
4727 	uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
4728 };
4729 
4730 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4731 {
4732 	int i, r;
4733 
4734 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4735 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4736 
4737 		if (ring->mqd_obj) {
4738 			r = amdgpu_bo_reserve(ring->mqd_obj, false);
4739 			if (unlikely(r != 0))
4740 				dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4741 
4742 			amdgpu_bo_unpin(ring->mqd_obj);
4743 			amdgpu_bo_unreserve(ring->mqd_obj);
4744 
4745 			amdgpu_bo_unref(&ring->mqd_obj);
4746 			ring->mqd_obj = NULL;
4747 		}
4748 	}
4749 }
4750 
4751 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4752 {
4753 	int r, i, j;
4754 	u32 tmp;
4755 	bool use_doorbell = true;
4756 	u64 hqd_gpu_addr;
4757 	u64 mqd_gpu_addr;
4758 	u64 eop_gpu_addr;
4759 	u64 wb_gpu_addr;
4760 	u32 *buf;
4761 	struct vi_mqd *mqd;
4762 
4763 	/* init the pipes */
4764 	mutex_lock(&adev->srbm_mutex);
4765 	for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
4766 		int me = (i < 4) ? 1 : 2;
4767 		int pipe = (i < 4) ? i : (i - 4);
4768 
4769 		eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
4770 		eop_gpu_addr >>= 8;
4771 
4772 		vi_srbm_select(adev, me, pipe, 0, 0);
4773 
4774 		/* write the EOP addr */
4775 		WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
4776 		WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4777 
4778 		/* set the VMID assigned */
4779 		WREG32(mmCP_HQD_VMID, 0);
4780 
4781 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4782 		tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4783 		tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4784 				    (order_base_2(MEC_HPD_SIZE / 4) - 1));
4785 		WREG32(mmCP_HQD_EOP_CONTROL, tmp);
4786 	}
4787 	vi_srbm_select(adev, 0, 0, 0, 0);
4788 	mutex_unlock(&adev->srbm_mutex);
4789 
4790 	/* init the queues.  Just two for now. */
4791 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4792 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4793 
4794 		if (ring->mqd_obj == NULL) {
4795 			r = amdgpu_bo_create(adev,
4796 					     sizeof(struct vi_mqd),
4797 					     PAGE_SIZE, true,
4798 					     AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4799 					     NULL, &ring->mqd_obj);
4800 			if (r) {
4801 				dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4802 				return r;
4803 			}
4804 		}
4805 
4806 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
4807 		if (unlikely(r != 0)) {
4808 			gfx_v8_0_cp_compute_fini(adev);
4809 			return r;
4810 		}
4811 		r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4812 				  &mqd_gpu_addr);
4813 		if (r) {
4814 			dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4815 			gfx_v8_0_cp_compute_fini(adev);
4816 			return r;
4817 		}
4818 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4819 		if (r) {
4820 			dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4821 			gfx_v8_0_cp_compute_fini(adev);
4822 			return r;
4823 		}
4824 
4825 		/* init the mqd struct */
4826 		memset(buf, 0, sizeof(struct vi_mqd));
4827 
4828 		mqd = (struct vi_mqd *)buf;
4829 		mqd->header = 0xC0310800;
4830 		mqd->compute_pipelinestat_enable = 0x00000001;
4831 		mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4832 		mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4833 		mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4834 		mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4835 		mqd->compute_misc_reserved = 0x00000003;
4836 
4837 		mutex_lock(&adev->srbm_mutex);
4838 		vi_srbm_select(adev, ring->me,
4839 			       ring->pipe,
4840 			       ring->queue, 0);
4841 
4842 		/* disable wptr polling */
4843 		tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4844 		tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4845 		WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4846 
4847 		mqd->cp_hqd_eop_base_addr_lo =
4848 			RREG32(mmCP_HQD_EOP_BASE_ADDR);
4849 		mqd->cp_hqd_eop_base_addr_hi =
4850 			RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
4851 
4852 		/* enable doorbell? */
4853 		tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4854 		if (use_doorbell) {
4855 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4856 		} else {
4857 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4858 		}
4859 		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4860 		mqd->cp_hqd_pq_doorbell_control = tmp;
4861 
4862 		/* disable the queue if it's active */
4863 		mqd->cp_hqd_dequeue_request = 0;
4864 		mqd->cp_hqd_pq_rptr = 0;
4865 		mqd->cp_hqd_pq_wptr= 0;
4866 		if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4867 			WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4868 			for (j = 0; j < adev->usec_timeout; j++) {
4869 				if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4870 					break;
4871 				udelay(1);
4872 			}
4873 			WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4874 			WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4875 			WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4876 		}
4877 
4878 		/* set the pointer to the MQD */
4879 		mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4880 		mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4881 		WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4882 		WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4883 
4884 		/* set MQD vmid to 0 */
4885 		tmp = RREG32(mmCP_MQD_CONTROL);
4886 		tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4887 		WREG32(mmCP_MQD_CONTROL, tmp);
4888 		mqd->cp_mqd_control = tmp;
4889 
4890 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4891 		hqd_gpu_addr = ring->gpu_addr >> 8;
4892 		mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4893 		mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4894 		WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4895 		WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4896 
4897 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4898 		tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4899 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4900 				    (order_base_2(ring->ring_size / 4) - 1));
4901 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4902 			       ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4903 #ifdef __BIG_ENDIAN
4904 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4905 #endif
4906 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4907 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4908 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4909 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4910 		WREG32(mmCP_HQD_PQ_CONTROL, tmp);
4911 		mqd->cp_hqd_pq_control = tmp;
4912 
4913 		/* set the wb address wether it's enabled or not */
4914 		wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4915 		mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4916 		mqd->cp_hqd_pq_rptr_report_addr_hi =
4917 			upper_32_bits(wb_gpu_addr) & 0xffff;
4918 		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4919 		       mqd->cp_hqd_pq_rptr_report_addr_lo);
4920 		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4921 		       mqd->cp_hqd_pq_rptr_report_addr_hi);
4922 
4923 		/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4924 		wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4925 		mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4926 		mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4927 		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
4928 		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4929 		       mqd->cp_hqd_pq_wptr_poll_addr_hi);
4930 
4931 		/* enable the doorbell if requested */
4932 		if (use_doorbell) {
4933 			if ((adev->asic_type == CHIP_CARRIZO) ||
4934 			    (adev->asic_type == CHIP_FIJI) ||
4935 			    (adev->asic_type == CHIP_STONEY) ||
4936 			    (adev->asic_type == CHIP_POLARIS11) ||
4937 			    (adev->asic_type == CHIP_POLARIS10)) {
4938 				WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4939 				       AMDGPU_DOORBELL_KIQ << 2);
4940 				WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4941 				       AMDGPU_DOORBELL_MEC_RING7 << 2);
4942 			}
4943 			tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4944 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4945 					    DOORBELL_OFFSET, ring->doorbell_index);
4946 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4947 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4948 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4949 			mqd->cp_hqd_pq_doorbell_control = tmp;
4950 
4951 		} else {
4952 			mqd->cp_hqd_pq_doorbell_control = 0;
4953 		}
4954 		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
4955 		       mqd->cp_hqd_pq_doorbell_control);
4956 
4957 		/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4958 		ring->wptr = 0;
4959 		mqd->cp_hqd_pq_wptr = ring->wptr;
4960 		WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4961 		mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4962 
4963 		/* set the vmid for the queue */
4964 		mqd->cp_hqd_vmid = 0;
4965 		WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4966 
4967 		tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4968 		tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4969 		WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
4970 		mqd->cp_hqd_persistent_state = tmp;
4971 		if (adev->asic_type == CHIP_STONEY ||
4972 			adev->asic_type == CHIP_POLARIS11 ||
4973 			adev->asic_type == CHIP_POLARIS10) {
4974 			tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
4975 			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
4976 			WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
4977 		}
4978 
4979 		/* activate the queue */
4980 		mqd->cp_hqd_active = 1;
4981 		WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4982 
4983 		vi_srbm_select(adev, 0, 0, 0, 0);
4984 		mutex_unlock(&adev->srbm_mutex);
4985 
4986 		amdgpu_bo_kunmap(ring->mqd_obj);
4987 		amdgpu_bo_unreserve(ring->mqd_obj);
4988 	}
4989 
4990 	if (use_doorbell) {
4991 		tmp = RREG32(mmCP_PQ_STATUS);
4992 		tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4993 		WREG32(mmCP_PQ_STATUS, tmp);
4994 	}
4995 
4996 	gfx_v8_0_cp_compute_enable(adev, true);
4997 
4998 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4999 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5000 
5001 		ring->ready = true;
5002 		r = amdgpu_ring_test_ring(ring);
5003 		if (r)
5004 			ring->ready = false;
5005 	}
5006 
5007 	return 0;
5008 }
5009 
5010 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
5011 {
5012 	int r;
5013 
5014 	if (!(adev->flags & AMD_IS_APU))
5015 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5016 
5017 	if (!adev->pp_enabled) {
5018 		if (!adev->firmware.smu_load) {
5019 			/* legacy firmware loading */
5020 			r = gfx_v8_0_cp_gfx_load_microcode(adev);
5021 			if (r)
5022 				return r;
5023 
5024 			r = gfx_v8_0_cp_compute_load_microcode(adev);
5025 			if (r)
5026 				return r;
5027 		} else {
5028 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5029 							AMDGPU_UCODE_ID_CP_CE);
5030 			if (r)
5031 				return -EINVAL;
5032 
5033 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5034 							AMDGPU_UCODE_ID_CP_PFP);
5035 			if (r)
5036 				return -EINVAL;
5037 
5038 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5039 							AMDGPU_UCODE_ID_CP_ME);
5040 			if (r)
5041 				return -EINVAL;
5042 
5043 			if (adev->asic_type == CHIP_TOPAZ) {
5044 				r = gfx_v8_0_cp_compute_load_microcode(adev);
5045 				if (r)
5046 					return r;
5047 			} else {
5048 				r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5049 										 AMDGPU_UCODE_ID_CP_MEC1);
5050 				if (r)
5051 					return -EINVAL;
5052 			}
5053 		}
5054 	}
5055 
5056 	r = gfx_v8_0_cp_gfx_resume(adev);
5057 	if (r)
5058 		return r;
5059 
5060 	r = gfx_v8_0_cp_compute_resume(adev);
5061 	if (r)
5062 		return r;
5063 
5064 	gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5065 
5066 	return 0;
5067 }
5068 
5069 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5070 {
5071 	gfx_v8_0_cp_gfx_enable(adev, enable);
5072 	gfx_v8_0_cp_compute_enable(adev, enable);
5073 }
5074 
5075 static int gfx_v8_0_hw_init(void *handle)
5076 {
5077 	int r;
5078 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5079 
5080 	gfx_v8_0_init_golden_registers(adev);
5081 	gfx_v8_0_gpu_init(adev);
5082 
5083 	r = gfx_v8_0_rlc_resume(adev);
5084 	if (r)
5085 		return r;
5086 
5087 	r = gfx_v8_0_cp_resume(adev);
5088 
5089 	return r;
5090 }
5091 
5092 static int gfx_v8_0_hw_fini(void *handle)
5093 {
5094 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5095 
5096 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5097 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5098 	gfx_v8_0_cp_enable(adev, false);
5099 	gfx_v8_0_rlc_stop(adev);
5100 	gfx_v8_0_cp_compute_fini(adev);
5101 
5102 	amdgpu_set_powergating_state(adev,
5103 			AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5104 
5105 	return 0;
5106 }
5107 
5108 static int gfx_v8_0_suspend(void *handle)
5109 {
5110 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5111 
5112 	return gfx_v8_0_hw_fini(adev);
5113 }
5114 
5115 static int gfx_v8_0_resume(void *handle)
5116 {
5117 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5118 
5119 	return gfx_v8_0_hw_init(adev);
5120 }
5121 
5122 static bool gfx_v8_0_is_idle(void *handle)
5123 {
5124 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5125 
5126 	if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5127 		return false;
5128 	else
5129 		return true;
5130 }
5131 
5132 static int gfx_v8_0_wait_for_idle(void *handle)
5133 {
5134 	unsigned i;
5135 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5136 
5137 	for (i = 0; i < adev->usec_timeout; i++) {
5138 		if (gfx_v8_0_is_idle(handle))
5139 			return 0;
5140 
5141 		udelay(1);
5142 	}
5143 	return -ETIMEDOUT;
5144 }
5145 
5146 static bool gfx_v8_0_check_soft_reset(void *handle)
5147 {
5148 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5149 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5150 	u32 tmp;
5151 
5152 	/* GRBM_STATUS */
5153 	tmp = RREG32(mmGRBM_STATUS);
5154 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5155 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5156 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5157 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5158 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5159 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5160 		   GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5161 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5162 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5163 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5164 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5165 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5166 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5167 	}
5168 
5169 	/* GRBM_STATUS2 */
5170 	tmp = RREG32(mmGRBM_STATUS2);
5171 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5172 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5173 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5174 
5175 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5176 	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5177 	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5178 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5179 						SOFT_RESET_CPF, 1);
5180 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5181 						SOFT_RESET_CPC, 1);
5182 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5183 						SOFT_RESET_CPG, 1);
5184 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5185 						SOFT_RESET_GRBM, 1);
5186 	}
5187 
5188 	/* SRBM_STATUS */
5189 	tmp = RREG32(mmSRBM_STATUS);
5190 	if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5191 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5192 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5193 	if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5194 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5195 						SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5196 
5197 	if (grbm_soft_reset || srbm_soft_reset) {
5198 		adev->gfx.grbm_soft_reset = grbm_soft_reset;
5199 		adev->gfx.srbm_soft_reset = srbm_soft_reset;
5200 		return true;
5201 	} else {
5202 		adev->gfx.grbm_soft_reset = 0;
5203 		adev->gfx.srbm_soft_reset = 0;
5204 		return false;
5205 	}
5206 }
5207 
5208 static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev,
5209 				  struct amdgpu_ring *ring)
5210 {
5211 	int i;
5212 
5213 	vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5214 	if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
5215 		u32 tmp;
5216 		tmp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
5217 		tmp = REG_SET_FIELD(tmp, CP_HQD_DEQUEUE_REQUEST,
5218 				    DEQUEUE_REQ, 2);
5219 		WREG32(mmCP_HQD_DEQUEUE_REQUEST, tmp);
5220 		for (i = 0; i < adev->usec_timeout; i++) {
5221 			if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
5222 				break;
5223 			udelay(1);
5224 		}
5225 	}
5226 }
5227 
5228 static int gfx_v8_0_pre_soft_reset(void *handle)
5229 {
5230 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5231 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5232 
5233 	if ((!adev->gfx.grbm_soft_reset) &&
5234 	    (!adev->gfx.srbm_soft_reset))
5235 		return 0;
5236 
5237 	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5238 	srbm_soft_reset = adev->gfx.srbm_soft_reset;
5239 
5240 	/* stop the rlc */
5241 	gfx_v8_0_rlc_stop(adev);
5242 
5243 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5244 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5245 		/* Disable GFX parsing/prefetching */
5246 		gfx_v8_0_cp_gfx_enable(adev, false);
5247 
5248 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5249 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5250 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5251 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5252 		int i;
5253 
5254 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5255 			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5256 
5257 			gfx_v8_0_inactive_hqd(adev, ring);
5258 		}
5259 		/* Disable MEC parsing/prefetching */
5260 		gfx_v8_0_cp_compute_enable(adev, false);
5261 	}
5262 
5263        return 0;
5264 }
5265 
5266 static int gfx_v8_0_soft_reset(void *handle)
5267 {
5268 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5269 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5270 	u32 tmp;
5271 
5272 	if ((!adev->gfx.grbm_soft_reset) &&
5273 	    (!adev->gfx.srbm_soft_reset))
5274 		return 0;
5275 
5276 	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5277 	srbm_soft_reset = adev->gfx.srbm_soft_reset;
5278 
5279 	if (grbm_soft_reset || srbm_soft_reset) {
5280 		tmp = RREG32(mmGMCON_DEBUG);
5281 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5282 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5283 		WREG32(mmGMCON_DEBUG, tmp);
5284 		udelay(50);
5285 	}
5286 
5287 	if (grbm_soft_reset) {
5288 		tmp = RREG32(mmGRBM_SOFT_RESET);
5289 		tmp |= grbm_soft_reset;
5290 		dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5291 		WREG32(mmGRBM_SOFT_RESET, tmp);
5292 		tmp = RREG32(mmGRBM_SOFT_RESET);
5293 
5294 		udelay(50);
5295 
5296 		tmp &= ~grbm_soft_reset;
5297 		WREG32(mmGRBM_SOFT_RESET, tmp);
5298 		tmp = RREG32(mmGRBM_SOFT_RESET);
5299 	}
5300 
5301 	if (srbm_soft_reset) {
5302 		tmp = RREG32(mmSRBM_SOFT_RESET);
5303 		tmp |= srbm_soft_reset;
5304 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5305 		WREG32(mmSRBM_SOFT_RESET, tmp);
5306 		tmp = RREG32(mmSRBM_SOFT_RESET);
5307 
5308 		udelay(50);
5309 
5310 		tmp &= ~srbm_soft_reset;
5311 		WREG32(mmSRBM_SOFT_RESET, tmp);
5312 		tmp = RREG32(mmSRBM_SOFT_RESET);
5313 	}
5314 
5315 	if (grbm_soft_reset || srbm_soft_reset) {
5316 		tmp = RREG32(mmGMCON_DEBUG);
5317 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5318 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5319 		WREG32(mmGMCON_DEBUG, tmp);
5320 	}
5321 
5322 	/* Wait a little for things to settle down */
5323 	udelay(50);
5324 
5325 	return 0;
5326 }
5327 
5328 static void gfx_v8_0_init_hqd(struct amdgpu_device *adev,
5329 			      struct amdgpu_ring *ring)
5330 {
5331 	vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5332 	WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
5333 	WREG32(mmCP_HQD_PQ_RPTR, 0);
5334 	WREG32(mmCP_HQD_PQ_WPTR, 0);
5335 	vi_srbm_select(adev, 0, 0, 0, 0);
5336 }
5337 
5338 static int gfx_v8_0_post_soft_reset(void *handle)
5339 {
5340 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5341 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5342 
5343 	if ((!adev->gfx.grbm_soft_reset) &&
5344 	    (!adev->gfx.srbm_soft_reset))
5345 		return 0;
5346 
5347 	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5348 	srbm_soft_reset = adev->gfx.srbm_soft_reset;
5349 
5350 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5351 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5352 		gfx_v8_0_cp_gfx_resume(adev);
5353 
5354 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5355 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5356 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5357 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5358 		int i;
5359 
5360 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5361 			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5362 
5363 			gfx_v8_0_init_hqd(adev, ring);
5364 		}
5365 		gfx_v8_0_cp_compute_resume(adev);
5366 	}
5367 	gfx_v8_0_rlc_start(adev);
5368 
5369 	return 0;
5370 }
5371 
5372 /**
5373  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5374  *
5375  * @adev: amdgpu_device pointer
5376  *
5377  * Fetches a GPU clock counter snapshot.
5378  * Returns the 64 bit clock counter snapshot.
5379  */
5380 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5381 {
5382 	uint64_t clock;
5383 
5384 	mutex_lock(&adev->gfx.gpu_clock_mutex);
5385 	WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5386 	clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5387 		((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5388 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
5389 	return clock;
5390 }
5391 
5392 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5393 					  uint32_t vmid,
5394 					  uint32_t gds_base, uint32_t gds_size,
5395 					  uint32_t gws_base, uint32_t gws_size,
5396 					  uint32_t oa_base, uint32_t oa_size)
5397 {
5398 	gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5399 	gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5400 
5401 	gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5402 	gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5403 
5404 	oa_base = oa_base >> AMDGPU_OA_SHIFT;
5405 	oa_size = oa_size >> AMDGPU_OA_SHIFT;
5406 
5407 	/* GDS Base */
5408 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5409 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5410 				WRITE_DATA_DST_SEL(0)));
5411 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5412 	amdgpu_ring_write(ring, 0);
5413 	amdgpu_ring_write(ring, gds_base);
5414 
5415 	/* GDS Size */
5416 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5417 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5418 				WRITE_DATA_DST_SEL(0)));
5419 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5420 	amdgpu_ring_write(ring, 0);
5421 	amdgpu_ring_write(ring, gds_size);
5422 
5423 	/* GWS */
5424 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5425 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5426 				WRITE_DATA_DST_SEL(0)));
5427 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5428 	amdgpu_ring_write(ring, 0);
5429 	amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5430 
5431 	/* OA */
5432 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5433 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5434 				WRITE_DATA_DST_SEL(0)));
5435 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5436 	amdgpu_ring_write(ring, 0);
5437 	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5438 }
5439 
5440 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5441 	.get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5442 	.select_se_sh = &gfx_v8_0_select_se_sh,
5443 };
5444 
5445 static int gfx_v8_0_early_init(void *handle)
5446 {
5447 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5448 
5449 	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5450 	adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5451 	adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5452 	gfx_v8_0_set_ring_funcs(adev);
5453 	gfx_v8_0_set_irq_funcs(adev);
5454 	gfx_v8_0_set_gds_init(adev);
5455 	gfx_v8_0_set_rlc_funcs(adev);
5456 
5457 	return 0;
5458 }
5459 
5460 static int gfx_v8_0_late_init(void *handle)
5461 {
5462 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5463 	int r;
5464 
5465 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5466 	if (r)
5467 		return r;
5468 
5469 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5470 	if (r)
5471 		return r;
5472 
5473 	/* requires IBs so do in late init after IB pool is initialized */
5474 	r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5475 	if (r)
5476 		return r;
5477 
5478 	amdgpu_set_powergating_state(adev,
5479 			AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5480 
5481 	return 0;
5482 }
5483 
5484 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5485 						       bool enable)
5486 {
5487 	if (adev->asic_type == CHIP_POLARIS11)
5488 		/* Send msg to SMU via Powerplay */
5489 		amdgpu_set_powergating_state(adev,
5490 					     AMD_IP_BLOCK_TYPE_SMC,
5491 					     enable ?
5492 					     AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5493 
5494 	WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5495 }
5496 
5497 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5498 							bool enable)
5499 {
5500 	WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5501 }
5502 
5503 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5504 		bool enable)
5505 {
5506 	WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5507 }
5508 
5509 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5510 					  bool enable)
5511 {
5512 	WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5513 }
5514 
5515 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5516 						bool enable)
5517 {
5518 	WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5519 
5520 	/* Read any GFX register to wake up GFX. */
5521 	if (!enable)
5522 		RREG32(mmDB_RENDER_CONTROL);
5523 }
5524 
5525 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5526 					  bool enable)
5527 {
5528 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5529 		cz_enable_gfx_cg_power_gating(adev, true);
5530 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5531 			cz_enable_gfx_pipeline_power_gating(adev, true);
5532 	} else {
5533 		cz_enable_gfx_cg_power_gating(adev, false);
5534 		cz_enable_gfx_pipeline_power_gating(adev, false);
5535 	}
5536 }
5537 
5538 static int gfx_v8_0_set_powergating_state(void *handle,
5539 					  enum amd_powergating_state state)
5540 {
5541 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5542 	bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
5543 
5544 	if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5545 		return 0;
5546 
5547 	switch (adev->asic_type) {
5548 	case CHIP_CARRIZO:
5549 	case CHIP_STONEY:
5550 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)
5551 			cz_update_gfx_cg_power_gating(adev, enable);
5552 
5553 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5554 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5555 		else
5556 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5557 
5558 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5559 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5560 		else
5561 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5562 		break;
5563 	case CHIP_POLARIS11:
5564 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5565 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5566 		else
5567 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5568 
5569 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5570 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5571 		else
5572 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5573 
5574 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5575 			polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5576 		else
5577 			polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5578 		break;
5579 	default:
5580 		break;
5581 	}
5582 
5583 	return 0;
5584 }
5585 
5586 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5587 				     uint32_t reg_addr, uint32_t cmd)
5588 {
5589 	uint32_t data;
5590 
5591 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5592 
5593 	WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5594 	WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5595 
5596 	data = RREG32(mmRLC_SERDES_WR_CTRL);
5597 	if (adev->asic_type == CHIP_STONEY)
5598 		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5599 			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5600 			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5601 			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5602 			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5603 			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5604 			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5605 			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5606 			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5607 	else
5608 		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5609 			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5610 			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5611 			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5612 			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5613 			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5614 			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5615 			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5616 			  RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5617 			  RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5618 			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5619 	data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5620 		 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5621 		 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5622 		 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5623 
5624 	WREG32(mmRLC_SERDES_WR_CTRL, data);
5625 }
5626 
5627 #define MSG_ENTER_RLC_SAFE_MODE     1
5628 #define MSG_EXIT_RLC_SAFE_MODE      0
5629 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5630 #define RLC_GPR_REG2__REQ__SHIFT 0
5631 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5632 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5633 
5634 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
5635 {
5636 	u32 data = 0;
5637 	unsigned i;
5638 
5639 	data = RREG32(mmRLC_CNTL);
5640 	if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5641 		return;
5642 
5643 	if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5644 	    (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5645 			       AMD_PG_SUPPORT_GFX_DMG))) {
5646 		data |= RLC_GPR_REG2__REQ_MASK;
5647 		data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5648 		data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5649 		WREG32(mmRLC_GPR_REG2, data);
5650 
5651 		for (i = 0; i < adev->usec_timeout; i++) {
5652 			if ((RREG32(mmRLC_GPM_STAT) &
5653 			     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5654 			      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5655 			    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5656 			     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5657 				break;
5658 			udelay(1);
5659 		}
5660 
5661 		for (i = 0; i < adev->usec_timeout; i++) {
5662 			if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
5663 				break;
5664 			udelay(1);
5665 		}
5666 		adev->gfx.rlc.in_safe_mode = true;
5667 	}
5668 }
5669 
5670 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
5671 {
5672 	u32 data;
5673 	unsigned i;
5674 
5675 	data = RREG32(mmRLC_CNTL);
5676 	if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5677 		return;
5678 
5679 	if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5680 	    (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5681 			       AMD_PG_SUPPORT_GFX_DMG))) {
5682 		data |= RLC_GPR_REG2__REQ_MASK;
5683 		data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5684 		data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5685 		WREG32(mmRLC_GPR_REG2, data);
5686 		adev->gfx.rlc.in_safe_mode = false;
5687 	}
5688 
5689 	for (i = 0; i < adev->usec_timeout; i++) {
5690 		if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
5691 			break;
5692 		udelay(1);
5693 	}
5694 }
5695 
5696 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5697 {
5698 	u32 data;
5699 	unsigned i;
5700 
5701 	data = RREG32(mmRLC_CNTL);
5702 	if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5703 		return;
5704 
5705 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5706 		data |= RLC_SAFE_MODE__CMD_MASK;
5707 		data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5708 		data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5709 		WREG32(mmRLC_SAFE_MODE, data);
5710 
5711 		for (i = 0; i < adev->usec_timeout; i++) {
5712 			if ((RREG32(mmRLC_GPM_STAT) &
5713 			     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5714 			      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5715 			    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5716 			     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5717 				break;
5718 			udelay(1);
5719 		}
5720 
5721 		for (i = 0; i < adev->usec_timeout; i++) {
5722 			if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5723 				break;
5724 			udelay(1);
5725 		}
5726 		adev->gfx.rlc.in_safe_mode = true;
5727 	}
5728 }
5729 
5730 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5731 {
5732 	u32 data = 0;
5733 	unsigned i;
5734 
5735 	data = RREG32(mmRLC_CNTL);
5736 	if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5737 		return;
5738 
5739 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5740 		if (adev->gfx.rlc.in_safe_mode) {
5741 			data |= RLC_SAFE_MODE__CMD_MASK;
5742 			data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5743 			WREG32(mmRLC_SAFE_MODE, data);
5744 			adev->gfx.rlc.in_safe_mode = false;
5745 		}
5746 	}
5747 
5748 	for (i = 0; i < adev->usec_timeout; i++) {
5749 		if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5750 			break;
5751 		udelay(1);
5752 	}
5753 }
5754 
5755 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
5756 {
5757 	adev->gfx.rlc.in_safe_mode = true;
5758 }
5759 
5760 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
5761 {
5762 	adev->gfx.rlc.in_safe_mode = false;
5763 }
5764 
5765 static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
5766 	.enter_safe_mode = cz_enter_rlc_safe_mode,
5767 	.exit_safe_mode = cz_exit_rlc_safe_mode
5768 };
5769 
5770 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5771 	.enter_safe_mode = iceland_enter_rlc_safe_mode,
5772 	.exit_safe_mode = iceland_exit_rlc_safe_mode
5773 };
5774 
5775 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
5776 	.enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
5777 	.exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
5778 };
5779 
5780 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5781 						      bool enable)
5782 {
5783 	uint32_t temp, data;
5784 
5785 	adev->gfx.rlc.funcs->enter_safe_mode(adev);
5786 
5787 	/* It is disabled by HW by default */
5788 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5789 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5790 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5791 				/* 1 - RLC memory Light sleep */
5792 				WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5793 
5794 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5795 				WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5796 		}
5797 
5798 		/* 3 - RLC_CGTT_MGCG_OVERRIDE */
5799 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5800 		if (adev->flags & AMD_IS_APU)
5801 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5802 				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5803 				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5804 		else
5805 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5806 				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5807 				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5808 				  RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5809 
5810 		if (temp != data)
5811 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5812 
5813 		/* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5814 		gfx_v8_0_wait_for_rlc_serdes(adev);
5815 
5816 		/* 5 - clear mgcg override */
5817 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5818 
5819 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5820 			/* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5821 			temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5822 			data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5823 			data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5824 			data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5825 			data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5826 			if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5827 			    (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5828 				data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5829 			data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5830 			data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5831 			if (temp != data)
5832 				WREG32(mmCGTS_SM_CTRL_REG, data);
5833 		}
5834 		udelay(50);
5835 
5836 		/* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5837 		gfx_v8_0_wait_for_rlc_serdes(adev);
5838 	} else {
5839 		/* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5840 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5841 		data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5842 				RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5843 				RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5844 				RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5845 		if (temp != data)
5846 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5847 
5848 		/* 2 - disable MGLS in RLC */
5849 		data = RREG32(mmRLC_MEM_SLP_CNTL);
5850 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5851 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5852 			WREG32(mmRLC_MEM_SLP_CNTL, data);
5853 		}
5854 
5855 		/* 3 - disable MGLS in CP */
5856 		data = RREG32(mmCP_MEM_SLP_CNTL);
5857 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5858 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5859 			WREG32(mmCP_MEM_SLP_CNTL, data);
5860 		}
5861 
5862 		/* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5863 		temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5864 		data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5865 				CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5866 		if (temp != data)
5867 			WREG32(mmCGTS_SM_CTRL_REG, data);
5868 
5869 		/* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5870 		gfx_v8_0_wait_for_rlc_serdes(adev);
5871 
5872 		/* 6 - set mgcg override */
5873 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5874 
5875 		udelay(50);
5876 
5877 		/* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5878 		gfx_v8_0_wait_for_rlc_serdes(adev);
5879 	}
5880 
5881 	adev->gfx.rlc.funcs->exit_safe_mode(adev);
5882 }
5883 
5884 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5885 						      bool enable)
5886 {
5887 	uint32_t temp, temp1, data, data1;
5888 
5889 	temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5890 
5891 	adev->gfx.rlc.funcs->enter_safe_mode(adev);
5892 
5893 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5894 		/* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
5895 		 * Cmp_busy/GFX_Idle interrupts
5896 		 */
5897 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5898 
5899 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5900 		data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5901 		if (temp1 != data1)
5902 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5903 
5904 		/* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5905 		gfx_v8_0_wait_for_rlc_serdes(adev);
5906 
5907 		/* 3 - clear cgcg override */
5908 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5909 
5910 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5911 		gfx_v8_0_wait_for_rlc_serdes(adev);
5912 
5913 		/* 4 - write cmd to set CGLS */
5914 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5915 
5916 		/* 5 - enable cgcg */
5917 		data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5918 
5919 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5920 			/* enable cgls*/
5921 			data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5922 
5923 			temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5924 			data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5925 
5926 			if (temp1 != data1)
5927 				WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5928 		} else {
5929 			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5930 		}
5931 
5932 		if (temp != data)
5933 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5934 	} else {
5935 		/* disable cntx_empty_int_enable & GFX Idle interrupt */
5936 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5937 
5938 		/* TEST CGCG */
5939 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5940 		data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5941 				RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5942 		if (temp1 != data1)
5943 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5944 
5945 		/* read gfx register to wake up cgcg */
5946 		RREG32(mmCB_CGTT_SCLK_CTRL);
5947 		RREG32(mmCB_CGTT_SCLK_CTRL);
5948 		RREG32(mmCB_CGTT_SCLK_CTRL);
5949 		RREG32(mmCB_CGTT_SCLK_CTRL);
5950 
5951 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5952 		gfx_v8_0_wait_for_rlc_serdes(adev);
5953 
5954 		/* write cmd to Set CGCG Overrride */
5955 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5956 
5957 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5958 		gfx_v8_0_wait_for_rlc_serdes(adev);
5959 
5960 		/* write cmd to Clear CGLS */
5961 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5962 
5963 		/* disable cgcg, cgls should be disabled too. */
5964 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5965 			  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5966 		if (temp != data)
5967 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5968 	}
5969 
5970 	gfx_v8_0_wait_for_rlc_serdes(adev);
5971 
5972 	adev->gfx.rlc.funcs->exit_safe_mode(adev);
5973 }
5974 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5975 					    bool enable)
5976 {
5977 	if (enable) {
5978 		/* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5979 		 * ===  MGCG + MGLS + TS(CG/LS) ===
5980 		 */
5981 		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5982 		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5983 	} else {
5984 		/* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5985 		 * ===  CGCG + CGLS ===
5986 		 */
5987 		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5988 		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5989 	}
5990 	return 0;
5991 }
5992 
5993 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5994 					  enum amd_clockgating_state state)
5995 {
5996 	uint32_t msg_id, pp_state;
5997 	void *pp_handle = adev->powerplay.pp_handle;
5998 
5999 	if (state == AMD_CG_STATE_UNGATE)
6000 		pp_state = 0;
6001 	else
6002 		pp_state = PP_STATE_CG | PP_STATE_LS;
6003 
6004 	msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6005 			PP_BLOCK_GFX_CG,
6006 			PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6007 			pp_state);
6008 	amd_set_clockgating_by_smu(pp_handle, msg_id);
6009 
6010 	msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6011 			PP_BLOCK_GFX_MG,
6012 			PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6013 			pp_state);
6014 	amd_set_clockgating_by_smu(pp_handle, msg_id);
6015 
6016 	return 0;
6017 }
6018 
6019 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6020 					  enum amd_clockgating_state state)
6021 {
6022 	uint32_t msg_id, pp_state;
6023 	void *pp_handle = adev->powerplay.pp_handle;
6024 
6025 	if (state == AMD_CG_STATE_UNGATE)
6026 		pp_state = 0;
6027 	else
6028 		pp_state = PP_STATE_CG | PP_STATE_LS;
6029 
6030 	msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6031 			PP_BLOCK_GFX_CG,
6032 			PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6033 			pp_state);
6034 	amd_set_clockgating_by_smu(pp_handle, msg_id);
6035 
6036 	msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6037 			PP_BLOCK_GFX_3D,
6038 			PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6039 			pp_state);
6040 	amd_set_clockgating_by_smu(pp_handle, msg_id);
6041 
6042 	msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6043 			PP_BLOCK_GFX_MG,
6044 			PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6045 			pp_state);
6046 	amd_set_clockgating_by_smu(pp_handle, msg_id);
6047 
6048 	msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6049 			PP_BLOCK_GFX_RLC,
6050 			PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6051 			pp_state);
6052 	amd_set_clockgating_by_smu(pp_handle, msg_id);
6053 
6054 	msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6055 			PP_BLOCK_GFX_CP,
6056 			PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6057 			pp_state);
6058 	amd_set_clockgating_by_smu(pp_handle, msg_id);
6059 
6060 	return 0;
6061 }
6062 
6063 static int gfx_v8_0_set_clockgating_state(void *handle,
6064 					  enum amd_clockgating_state state)
6065 {
6066 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6067 
6068 	switch (adev->asic_type) {
6069 	case CHIP_FIJI:
6070 	case CHIP_CARRIZO:
6071 	case CHIP_STONEY:
6072 		gfx_v8_0_update_gfx_clock_gating(adev,
6073 						 state == AMD_CG_STATE_GATE ? true : false);
6074 		break;
6075 	case CHIP_TONGA:
6076 		gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6077 		break;
6078 	case CHIP_POLARIS10:
6079 	case CHIP_POLARIS11:
6080 		gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6081 		break;
6082 	default:
6083 		break;
6084 	}
6085 	return 0;
6086 }
6087 
6088 static u32 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6089 {
6090 	return ring->adev->wb.wb[ring->rptr_offs];
6091 }
6092 
6093 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6094 {
6095 	struct amdgpu_device *adev = ring->adev;
6096 
6097 	if (ring->use_doorbell)
6098 		/* XXX check if swapping is necessary on BE */
6099 		return ring->adev->wb.wb[ring->wptr_offs];
6100 	else
6101 		return RREG32(mmCP_RB0_WPTR);
6102 }
6103 
6104 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6105 {
6106 	struct amdgpu_device *adev = ring->adev;
6107 
6108 	if (ring->use_doorbell) {
6109 		/* XXX check if swapping is necessary on BE */
6110 		adev->wb.wb[ring->wptr_offs] = ring->wptr;
6111 		WDOORBELL32(ring->doorbell_index, ring->wptr);
6112 	} else {
6113 		WREG32(mmCP_RB0_WPTR, ring->wptr);
6114 		(void)RREG32(mmCP_RB0_WPTR);
6115 	}
6116 }
6117 
6118 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6119 {
6120 	u32 ref_and_mask, reg_mem_engine;
6121 
6122 	if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
6123 		switch (ring->me) {
6124 		case 1:
6125 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6126 			break;
6127 		case 2:
6128 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6129 			break;
6130 		default:
6131 			return;
6132 		}
6133 		reg_mem_engine = 0;
6134 	} else {
6135 		ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6136 		reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6137 	}
6138 
6139 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6140 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6141 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
6142 				 reg_mem_engine));
6143 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6144 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6145 	amdgpu_ring_write(ring, ref_and_mask);
6146 	amdgpu_ring_write(ring, ref_and_mask);
6147 	amdgpu_ring_write(ring, 0x20); /* poll interval */
6148 }
6149 
6150 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6151 {
6152 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6153 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6154 				 WRITE_DATA_DST_SEL(0) |
6155 				 WR_CONFIRM));
6156 	amdgpu_ring_write(ring, mmHDP_DEBUG0);
6157 	amdgpu_ring_write(ring, 0);
6158 	amdgpu_ring_write(ring, 1);
6159 
6160 }
6161 
6162 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6163 				      struct amdgpu_ib *ib,
6164 				      unsigned vm_id, bool ctx_switch)
6165 {
6166 	u32 header, control = 0;
6167 
6168 	if (ib->flags & AMDGPU_IB_FLAG_CE)
6169 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6170 	else
6171 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6172 
6173 	control |= ib->length_dw | (vm_id << 24);
6174 
6175 	amdgpu_ring_write(ring, header);
6176 	amdgpu_ring_write(ring,
6177 #ifdef __BIG_ENDIAN
6178 			  (2 << 0) |
6179 #endif
6180 			  (ib->gpu_addr & 0xFFFFFFFC));
6181 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6182 	amdgpu_ring_write(ring, control);
6183 }
6184 
6185 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6186 					  struct amdgpu_ib *ib,
6187 					  unsigned vm_id, bool ctx_switch)
6188 {
6189 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6190 
6191 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6192 	amdgpu_ring_write(ring,
6193 #ifdef __BIG_ENDIAN
6194 				(2 << 0) |
6195 #endif
6196 				(ib->gpu_addr & 0xFFFFFFFC));
6197 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6198 	amdgpu_ring_write(ring, control);
6199 }
6200 
6201 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6202 					 u64 seq, unsigned flags)
6203 {
6204 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6205 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6206 
6207 	/* EVENT_WRITE_EOP - flush caches, send int */
6208 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6209 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6210 				 EOP_TC_ACTION_EN |
6211 				 EOP_TC_WB_ACTION_EN |
6212 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6213 				 EVENT_INDEX(5)));
6214 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6215 	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6216 			  DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6217 	amdgpu_ring_write(ring, lower_32_bits(seq));
6218 	amdgpu_ring_write(ring, upper_32_bits(seq));
6219 
6220 }
6221 
6222 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6223 {
6224 	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
6225 	uint32_t seq = ring->fence_drv.sync_seq;
6226 	uint64_t addr = ring->fence_drv.gpu_addr;
6227 
6228 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6229 	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6230 				 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6231 				 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6232 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6233 	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6234 	amdgpu_ring_write(ring, seq);
6235 	amdgpu_ring_write(ring, 0xffffffff);
6236 	amdgpu_ring_write(ring, 4); /* poll interval */
6237 }
6238 
6239 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6240 					unsigned vm_id, uint64_t pd_addr)
6241 {
6242 	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
6243 
6244 	/* GFX8 emits 128 dw nop to prevent DE do vm_flush before CE finish CEIB */
6245 	if (usepfp)
6246 		amdgpu_ring_insert_nop(ring, 128);
6247 
6248 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6249 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6250 				 WRITE_DATA_DST_SEL(0)) |
6251 				 WR_CONFIRM);
6252 	if (vm_id < 8) {
6253 		amdgpu_ring_write(ring,
6254 				  (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6255 	} else {
6256 		amdgpu_ring_write(ring,
6257 				  (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6258 	}
6259 	amdgpu_ring_write(ring, 0);
6260 	amdgpu_ring_write(ring, pd_addr >> 12);
6261 
6262 	/* bits 0-15 are the VM contexts0-15 */
6263 	/* invalidate the cache */
6264 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6265 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6266 				 WRITE_DATA_DST_SEL(0)));
6267 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6268 	amdgpu_ring_write(ring, 0);
6269 	amdgpu_ring_write(ring, 1 << vm_id);
6270 
6271 	/* wait for the invalidate to complete */
6272 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6273 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6274 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6275 				 WAIT_REG_MEM_ENGINE(0))); /* me */
6276 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6277 	amdgpu_ring_write(ring, 0);
6278 	amdgpu_ring_write(ring, 0); /* ref */
6279 	amdgpu_ring_write(ring, 0); /* mask */
6280 	amdgpu_ring_write(ring, 0x20); /* poll interval */
6281 
6282 	/* compute doesn't have PFP */
6283 	if (usepfp) {
6284 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6285 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6286 		amdgpu_ring_write(ring, 0x0);
6287 		/* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */
6288 		amdgpu_ring_insert_nop(ring, 128);
6289 	}
6290 }
6291 
6292 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6293 {
6294 	return ring->adev->wb.wb[ring->wptr_offs];
6295 }
6296 
6297 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6298 {
6299 	struct amdgpu_device *adev = ring->adev;
6300 
6301 	/* XXX check if swapping is necessary on BE */
6302 	adev->wb.wb[ring->wptr_offs] = ring->wptr;
6303 	WDOORBELL32(ring->doorbell_index, ring->wptr);
6304 }
6305 
6306 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6307 					     u64 addr, u64 seq,
6308 					     unsigned flags)
6309 {
6310 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6311 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6312 
6313 	/* RELEASE_MEM - flush caches, send int */
6314 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6315 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6316 				 EOP_TC_ACTION_EN |
6317 				 EOP_TC_WB_ACTION_EN |
6318 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6319 				 EVENT_INDEX(5)));
6320 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6321 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6322 	amdgpu_ring_write(ring, upper_32_bits(addr));
6323 	amdgpu_ring_write(ring, lower_32_bits(seq));
6324 	amdgpu_ring_write(ring, upper_32_bits(seq));
6325 }
6326 
6327 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6328 {
6329 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6330 	amdgpu_ring_write(ring, 0);
6331 }
6332 
6333 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6334 {
6335 	uint32_t dw2 = 0;
6336 
6337 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6338 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6339 		/* set load_global_config & load_global_uconfig */
6340 		dw2 |= 0x8001;
6341 		/* set load_cs_sh_regs */
6342 		dw2 |= 0x01000000;
6343 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
6344 		dw2 |= 0x10002;
6345 
6346 		/* set load_ce_ram if preamble presented */
6347 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6348 			dw2 |= 0x10000000;
6349 	} else {
6350 		/* still load_ce_ram if this is the first time preamble presented
6351 		 * although there is no context switch happens.
6352 		 */
6353 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6354 			dw2 |= 0x10000000;
6355 	}
6356 
6357 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6358 	amdgpu_ring_write(ring, dw2);
6359 	amdgpu_ring_write(ring, 0);
6360 }
6361 
6362 static unsigned gfx_v8_0_ring_get_emit_ib_size_gfx(struct amdgpu_ring *ring)
6363 {
6364 	return
6365 		4; /* gfx_v8_0_ring_emit_ib_gfx */
6366 }
6367 
6368 static unsigned gfx_v8_0_ring_get_dma_frame_size_gfx(struct amdgpu_ring *ring)
6369 {
6370 	return
6371 		20 + /* gfx_v8_0_ring_emit_gds_switch */
6372 		7 + /* gfx_v8_0_ring_emit_hdp_flush */
6373 		5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6374 		6 + 6 + 6 +/* gfx_v8_0_ring_emit_fence_gfx x3 for user fence, vm fence */
6375 		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6376 		256 + 19 + /* gfx_v8_0_ring_emit_vm_flush */
6377 		2 + /* gfx_v8_ring_emit_sb */
6378 		3; /* gfx_v8_ring_emit_cntxcntl */
6379 }
6380 
6381 static unsigned gfx_v8_0_ring_get_emit_ib_size_compute(struct amdgpu_ring *ring)
6382 {
6383 	return
6384 		4; /* gfx_v8_0_ring_emit_ib_compute */
6385 }
6386 
6387 static unsigned gfx_v8_0_ring_get_dma_frame_size_compute(struct amdgpu_ring *ring)
6388 {
6389 	return
6390 		20 + /* gfx_v8_0_ring_emit_gds_switch */
6391 		7 + /* gfx_v8_0_ring_emit_hdp_flush */
6392 		5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6393 		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6394 		17 + /* gfx_v8_0_ring_emit_vm_flush */
6395 		7 + 7 + 7; /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6396 }
6397 
6398 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6399 						 enum amdgpu_interrupt_state state)
6400 {
6401 	WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6402 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6403 }
6404 
6405 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6406 						     int me, int pipe,
6407 						     enum amdgpu_interrupt_state state)
6408 {
6409 	/*
6410 	 * amdgpu controls only pipe 0 of MEC1. That's why this function only
6411 	 * handles the setting of interrupts for this specific pipe. All other
6412 	 * pipes' interrupts are set by amdkfd.
6413 	 */
6414 
6415 	if (me == 1) {
6416 		switch (pipe) {
6417 		case 0:
6418 			break;
6419 		default:
6420 			DRM_DEBUG("invalid pipe %d\n", pipe);
6421 			return;
6422 		}
6423 	} else {
6424 		DRM_DEBUG("invalid me %d\n", me);
6425 		return;
6426 	}
6427 
6428 	WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE,
6429 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6430 }
6431 
6432 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6433 					     struct amdgpu_irq_src *source,
6434 					     unsigned type,
6435 					     enum amdgpu_interrupt_state state)
6436 {
6437 	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6438 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6439 
6440 	return 0;
6441 }
6442 
6443 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6444 					      struct amdgpu_irq_src *source,
6445 					      unsigned type,
6446 					      enum amdgpu_interrupt_state state)
6447 {
6448 	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6449 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6450 
6451 	return 0;
6452 }
6453 
6454 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6455 					    struct amdgpu_irq_src *src,
6456 					    unsigned type,
6457 					    enum amdgpu_interrupt_state state)
6458 {
6459 	switch (type) {
6460 	case AMDGPU_CP_IRQ_GFX_EOP:
6461 		gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6462 		break;
6463 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6464 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6465 		break;
6466 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6467 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6468 		break;
6469 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6470 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6471 		break;
6472 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6473 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6474 		break;
6475 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6476 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6477 		break;
6478 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6479 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6480 		break;
6481 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6482 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6483 		break;
6484 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6485 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6486 		break;
6487 	default:
6488 		break;
6489 	}
6490 	return 0;
6491 }
6492 
6493 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6494 			    struct amdgpu_irq_src *source,
6495 			    struct amdgpu_iv_entry *entry)
6496 {
6497 	int i;
6498 	u8 me_id, pipe_id, queue_id;
6499 	struct amdgpu_ring *ring;
6500 
6501 	DRM_DEBUG("IH: CP EOP\n");
6502 	me_id = (entry->ring_id & 0x0c) >> 2;
6503 	pipe_id = (entry->ring_id & 0x03) >> 0;
6504 	queue_id = (entry->ring_id & 0x70) >> 4;
6505 
6506 	switch (me_id) {
6507 	case 0:
6508 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6509 		break;
6510 	case 1:
6511 	case 2:
6512 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6513 			ring = &adev->gfx.compute_ring[i];
6514 			/* Per-queue interrupt is supported for MEC starting from VI.
6515 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
6516 			  */
6517 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6518 				amdgpu_fence_process(ring);
6519 		}
6520 		break;
6521 	}
6522 	return 0;
6523 }
6524 
6525 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6526 				 struct amdgpu_irq_src *source,
6527 				 struct amdgpu_iv_entry *entry)
6528 {
6529 	DRM_ERROR("Illegal register access in command stream\n");
6530 	schedule_work(&adev->reset_work);
6531 	return 0;
6532 }
6533 
6534 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6535 				  struct amdgpu_irq_src *source,
6536 				  struct amdgpu_iv_entry *entry)
6537 {
6538 	DRM_ERROR("Illegal instruction in command stream\n");
6539 	schedule_work(&adev->reset_work);
6540 	return 0;
6541 }
6542 
6543 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6544 	.name = "gfx_v8_0",
6545 	.early_init = gfx_v8_0_early_init,
6546 	.late_init = gfx_v8_0_late_init,
6547 	.sw_init = gfx_v8_0_sw_init,
6548 	.sw_fini = gfx_v8_0_sw_fini,
6549 	.hw_init = gfx_v8_0_hw_init,
6550 	.hw_fini = gfx_v8_0_hw_fini,
6551 	.suspend = gfx_v8_0_suspend,
6552 	.resume = gfx_v8_0_resume,
6553 	.is_idle = gfx_v8_0_is_idle,
6554 	.wait_for_idle = gfx_v8_0_wait_for_idle,
6555 	.check_soft_reset = gfx_v8_0_check_soft_reset,
6556 	.pre_soft_reset = gfx_v8_0_pre_soft_reset,
6557 	.soft_reset = gfx_v8_0_soft_reset,
6558 	.post_soft_reset = gfx_v8_0_post_soft_reset,
6559 	.set_clockgating_state = gfx_v8_0_set_clockgating_state,
6560 	.set_powergating_state = gfx_v8_0_set_powergating_state,
6561 };
6562 
6563 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6564 	.get_rptr = gfx_v8_0_ring_get_rptr,
6565 	.get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6566 	.set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6567 	.parse_cs = NULL,
6568 	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6569 	.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6570 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6571 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6572 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6573 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6574 	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6575 	.test_ring = gfx_v8_0_ring_test_ring,
6576 	.test_ib = gfx_v8_0_ring_test_ib,
6577 	.insert_nop = amdgpu_ring_insert_nop,
6578 	.pad_ib = amdgpu_ring_generic_pad_ib,
6579 	.emit_switch_buffer = gfx_v8_ring_emit_sb,
6580 	.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6581 	.get_emit_ib_size = gfx_v8_0_ring_get_emit_ib_size_gfx,
6582 	.get_dma_frame_size = gfx_v8_0_ring_get_dma_frame_size_gfx,
6583 };
6584 
6585 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6586 	.get_rptr = gfx_v8_0_ring_get_rptr,
6587 	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
6588 	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
6589 	.parse_cs = NULL,
6590 	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
6591 	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
6592 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6593 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6594 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6595 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6596 	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6597 	.test_ring = gfx_v8_0_ring_test_ring,
6598 	.test_ib = gfx_v8_0_ring_test_ib,
6599 	.insert_nop = amdgpu_ring_insert_nop,
6600 	.pad_ib = amdgpu_ring_generic_pad_ib,
6601 	.get_emit_ib_size = gfx_v8_0_ring_get_emit_ib_size_compute,
6602 	.get_dma_frame_size = gfx_v8_0_ring_get_dma_frame_size_compute,
6603 };
6604 
6605 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6606 {
6607 	int i;
6608 
6609 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6610 		adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6611 
6612 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6613 		adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6614 }
6615 
6616 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6617 	.set = gfx_v8_0_set_eop_interrupt_state,
6618 	.process = gfx_v8_0_eop_irq,
6619 };
6620 
6621 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6622 	.set = gfx_v8_0_set_priv_reg_fault_state,
6623 	.process = gfx_v8_0_priv_reg_irq,
6624 };
6625 
6626 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6627 	.set = gfx_v8_0_set_priv_inst_fault_state,
6628 	.process = gfx_v8_0_priv_inst_irq,
6629 };
6630 
6631 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6632 {
6633 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6634 	adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6635 
6636 	adev->gfx.priv_reg_irq.num_types = 1;
6637 	adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6638 
6639 	adev->gfx.priv_inst_irq.num_types = 1;
6640 	adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6641 }
6642 
6643 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6644 {
6645 	switch (adev->asic_type) {
6646 	case CHIP_TOPAZ:
6647 		adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6648 		break;
6649 	case CHIP_STONEY:
6650 	case CHIP_CARRIZO:
6651 		adev->gfx.rlc.funcs = &cz_rlc_funcs;
6652 		break;
6653 	default:
6654 		adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
6655 		break;
6656 	}
6657 }
6658 
6659 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6660 {
6661 	/* init asci gds info */
6662 	adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6663 	adev->gds.gws.total_size = 64;
6664 	adev->gds.oa.total_size = 16;
6665 
6666 	if (adev->gds.mem.total_size == 64 * 1024) {
6667 		adev->gds.mem.gfx_partition_size = 4096;
6668 		adev->gds.mem.cs_partition_size = 4096;
6669 
6670 		adev->gds.gws.gfx_partition_size = 4;
6671 		adev->gds.gws.cs_partition_size = 4;
6672 
6673 		adev->gds.oa.gfx_partition_size = 4;
6674 		adev->gds.oa.cs_partition_size = 1;
6675 	} else {
6676 		adev->gds.mem.gfx_partition_size = 1024;
6677 		adev->gds.mem.cs_partition_size = 1024;
6678 
6679 		adev->gds.gws.gfx_partition_size = 16;
6680 		adev->gds.gws.cs_partition_size = 16;
6681 
6682 		adev->gds.oa.gfx_partition_size = 4;
6683 		adev->gds.oa.cs_partition_size = 4;
6684 	}
6685 }
6686 
6687 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6688 						 u32 bitmap)
6689 {
6690 	u32 data;
6691 
6692 	if (!bitmap)
6693 		return;
6694 
6695 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6696 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6697 
6698 	WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6699 }
6700 
6701 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6702 {
6703 	u32 data, mask;
6704 
6705 	data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
6706 		RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6707 
6708 	mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
6709 
6710 	return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
6711 }
6712 
6713 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6714 {
6715 	int i, j, k, counter, active_cu_number = 0;
6716 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6717 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6718 	unsigned disable_masks[4 * 2];
6719 
6720 	memset(cu_info, 0, sizeof(*cu_info));
6721 
6722 	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
6723 
6724 	mutex_lock(&adev->grbm_idx_mutex);
6725 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6726 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6727 			mask = 1;
6728 			ao_bitmap = 0;
6729 			counter = 0;
6730 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
6731 			if (i < 4 && j < 2)
6732 				gfx_v8_0_set_user_cu_inactive_bitmap(
6733 					adev, disable_masks[i * 2 + j]);
6734 			bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6735 			cu_info->bitmap[i][j] = bitmap;
6736 
6737 			for (k = 0; k < 16; k ++) {
6738 				if (bitmap & mask) {
6739 					if (counter < 2)
6740 						ao_bitmap |= mask;
6741 					counter ++;
6742 				}
6743 				mask <<= 1;
6744 			}
6745 			active_cu_number += counter;
6746 			ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6747 		}
6748 	}
6749 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6750 	mutex_unlock(&adev->grbm_idx_mutex);
6751 
6752 	cu_info->number = active_cu_number;
6753 	cu_info->ao_cu_mask = ao_cu_mask;
6754 }
6755