xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c (revision 23c2b932)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vid.h"
29 #include "amdgpu_ucode.h"
30 #include "amdgpu_atombios.h"
31 #include "atombios_i2c.h"
32 #include "clearstate_vi.h"
33 
34 #include "gmc/gmc_8_2_d.h"
35 #include "gmc/gmc_8_2_sh_mask.h"
36 
37 #include "oss/oss_3_0_d.h"
38 #include "oss/oss_3_0_sh_mask.h"
39 
40 #include "bif/bif_5_0_d.h"
41 #include "bif/bif_5_0_sh_mask.h"
42 
43 #include "gca/gfx_8_0_d.h"
44 #include "gca/gfx_8_0_enum.h"
45 #include "gca/gfx_8_0_sh_mask.h"
46 #include "gca/gfx_8_0_enum.h"
47 
48 #include "dce/dce_10_0_d.h"
49 #include "dce/dce_10_0_sh_mask.h"
50 
51 #include "smu/smu_7_1_3_d.h"
52 
53 #define GFX8_NUM_GFX_RINGS     1
54 #define GFX8_NUM_COMPUTE_RINGS 8
55 
56 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
59 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
60 
61 #define ARRAY_MODE(x)					((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
62 #define PIPE_CONFIG(x)					((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
63 #define TILE_SPLIT(x)					((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
64 #define MICRO_TILE_MODE_NEW(x)				((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
65 #define SAMPLE_SPLIT(x)					((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
66 #define BANK_WIDTH(x)					((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
67 #define BANK_HEIGHT(x)					((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
68 #define MACRO_TILE_ASPECT(x)				((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
69 #define NUM_BANKS(x)					((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
70 
71 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
72 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
73 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
74 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
76 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
77 
78 /* BPM SERDES CMD */
79 #define SET_BPM_SERDES_CMD    1
80 #define CLE_BPM_SERDES_CMD    0
81 
82 /* BPM Register Address*/
83 enum {
84 	BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
85 	BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
86 	BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
87 	BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
88 	BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
89 	BPM_REG_FGCG_MAX
90 };
91 
92 #define RLC_FormatDirectRegListLength        14
93 
94 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
95 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
100 
101 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
106 
107 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
113 
114 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
119 
120 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
126 
127 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
128 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
133 
134 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
140 
141 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
142 {
143 	{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
144 	{mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
145 	{mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
146 	{mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
147 	{mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
148 	{mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
149 	{mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
150 	{mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
151 	{mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
152 	{mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
153 	{mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
154 	{mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
155 	{mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
156 	{mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
157 	{mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
158 	{mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
159 };
160 
161 static const u32 golden_settings_tonga_a11[] =
162 {
163 	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
164 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
165 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
166 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
167 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
168 	mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
169 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
170 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
171 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
172 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
173 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
174 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
175 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
176 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
177 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
178 };
179 
180 static const u32 tonga_golden_common_all[] =
181 {
182 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
183 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
184 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
185 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
186 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
187 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
188 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
189 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
190 };
191 
192 static const u32 tonga_mgcg_cgcg_init[] =
193 {
194 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
195 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
196 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
197 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
198 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
199 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
200 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
201 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
202 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
203 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
204 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
205 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
206 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
207 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
208 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
209 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
210 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
211 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
212 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
213 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
214 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
215 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
216 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
217 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
218 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
219 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
220 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
221 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
222 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
223 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
224 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
225 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
226 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
227 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
228 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
229 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
230 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
231 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
232 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
233 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
234 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
235 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
236 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
237 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
238 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
239 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
240 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
241 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
242 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
243 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
244 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
245 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
246 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
247 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
248 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
249 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
250 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
251 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
252 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
253 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
254 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
255 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
256 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
257 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
258 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
259 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
260 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
261 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
262 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
263 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
264 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
265 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
266 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
267 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
268 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
269 };
270 
271 static const u32 golden_settings_polaris11_a11[] =
272 {
273 	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00006208,
274 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
275 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
276 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
277 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
278 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
279 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
280 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
281 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
282 	mmSQ_CONFIG, 0x07f80000, 0x07180000,
283 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
284 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
285 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
286 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
287 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
288 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
289 };
290 
291 static const u32 polaris11_golden_common_all[] =
292 {
293 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
294 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
295 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
296 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
297 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
298 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
299 };
300 
301 static const u32 golden_settings_polaris10_a11[] =
302 {
303 	mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
304 	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
305 	mmCB_HW_CONTROL_2, 0, 0x0f000000,
306 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
307 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
308 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
309 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
310 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
311 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
312 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
313 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
314 	mmSQ_CONFIG, 0x07f80000, 0x07180000,
315 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
316 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
317 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
318 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
319 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
320 };
321 
322 static const u32 polaris10_golden_common_all[] =
323 {
324 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
325 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
326 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
327 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
328 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
329 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
330 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
331 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
332 };
333 
334 static const u32 fiji_golden_common_all[] =
335 {
336 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
337 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
338 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
339 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
340 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
341 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
342 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
343 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
344 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
345 	mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
346 };
347 
348 static const u32 golden_settings_fiji_a10[] =
349 {
350 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
351 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
352 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
353 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
354 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
355 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
356 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
357 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
358 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
359 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
360 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
361 };
362 
363 static const u32 fiji_mgcg_cgcg_init[] =
364 {
365 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
366 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
367 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
368 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
369 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
370 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
371 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
372 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
373 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
374 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
375 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
376 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
377 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
378 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
379 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
380 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
381 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
382 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
383 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
384 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
385 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
386 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
387 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
388 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
389 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
390 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
391 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
392 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
393 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
394 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
395 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
396 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
397 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
398 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
399 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
400 };
401 
402 static const u32 golden_settings_iceland_a11[] =
403 {
404 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
405 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
406 	mmDB_DEBUG3, 0xc0000000, 0xc0000000,
407 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
408 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
409 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
410 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
411 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
412 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
413 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
414 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
415 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
416 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
417 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
418 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
419 };
420 
421 static const u32 iceland_golden_common_all[] =
422 {
423 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
424 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
425 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
426 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
427 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
428 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
429 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
430 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
431 };
432 
433 static const u32 iceland_mgcg_cgcg_init[] =
434 {
435 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
436 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
437 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
438 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
439 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
440 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
441 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
442 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
443 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
444 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
445 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
446 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
447 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
448 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
449 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
450 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
451 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
452 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
453 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
454 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
455 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
456 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
457 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
458 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
459 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
460 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
461 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
462 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
463 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
464 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
465 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
466 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
467 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
468 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
469 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
470 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
471 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
472 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
473 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
474 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
475 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
476 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
477 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
478 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
479 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
480 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
481 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
482 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
483 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
484 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
485 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
486 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
487 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
488 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
489 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
490 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
491 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
492 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
493 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
494 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
495 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
496 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
497 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
498 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
499 };
500 
501 static const u32 cz_golden_settings_a11[] =
502 {
503 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
504 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
505 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
506 	mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
507 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
508 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
509 	mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
510 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
511 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
512 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
513 };
514 
515 static const u32 cz_golden_common_all[] =
516 {
517 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
518 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
519 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
520 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
521 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
522 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
523 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
524 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
525 };
526 
527 static const u32 cz_mgcg_cgcg_init[] =
528 {
529 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
530 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
531 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
532 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
533 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
534 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
535 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
536 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
537 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
538 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
539 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
540 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
541 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
542 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
543 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
544 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
545 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
546 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
547 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
548 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
549 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
550 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
551 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
552 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
553 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
554 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
555 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
556 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
557 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
558 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
559 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
560 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
561 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
562 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
563 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
564 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
565 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
566 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
567 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
568 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
569 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
570 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
571 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
572 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
573 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
574 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
575 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
576 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
577 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
578 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
579 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
580 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
581 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
582 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
583 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
584 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
585 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
586 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
587 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
588 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
589 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
590 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
591 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
592 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
593 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
594 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
595 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
596 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
597 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
598 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
599 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
600 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
601 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
602 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
603 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
604 };
605 
606 static const u32 stoney_golden_settings_a11[] =
607 {
608 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
609 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
610 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
611 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
612 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
613 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
614 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
615 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
616 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
617 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
618 };
619 
620 static const u32 stoney_golden_common_all[] =
621 {
622 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
623 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
624 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
625 	mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
626 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
627 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
628 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
629 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
630 };
631 
632 static const u32 stoney_mgcg_cgcg_init[] =
633 {
634 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
635 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
636 	mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
637 	mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
638 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
639 	mmATC_MISC_CG, 0xffffffff, 0x000c0200,
640 };
641 
642 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
643 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
644 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
645 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
646 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
647 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
648 
649 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
650 {
651 	switch (adev->asic_type) {
652 	case CHIP_TOPAZ:
653 		amdgpu_program_register_sequence(adev,
654 						 iceland_mgcg_cgcg_init,
655 						 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
656 		amdgpu_program_register_sequence(adev,
657 						 golden_settings_iceland_a11,
658 						 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
659 		amdgpu_program_register_sequence(adev,
660 						 iceland_golden_common_all,
661 						 (const u32)ARRAY_SIZE(iceland_golden_common_all));
662 		break;
663 	case CHIP_FIJI:
664 		amdgpu_program_register_sequence(adev,
665 						 fiji_mgcg_cgcg_init,
666 						 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
667 		amdgpu_program_register_sequence(adev,
668 						 golden_settings_fiji_a10,
669 						 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
670 		amdgpu_program_register_sequence(adev,
671 						 fiji_golden_common_all,
672 						 (const u32)ARRAY_SIZE(fiji_golden_common_all));
673 		break;
674 
675 	case CHIP_TONGA:
676 		amdgpu_program_register_sequence(adev,
677 						 tonga_mgcg_cgcg_init,
678 						 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
679 		amdgpu_program_register_sequence(adev,
680 						 golden_settings_tonga_a11,
681 						 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
682 		amdgpu_program_register_sequence(adev,
683 						 tonga_golden_common_all,
684 						 (const u32)ARRAY_SIZE(tonga_golden_common_all));
685 		break;
686 	case CHIP_POLARIS11:
687 		amdgpu_program_register_sequence(adev,
688 						 golden_settings_polaris11_a11,
689 						 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
690 		amdgpu_program_register_sequence(adev,
691 						 polaris11_golden_common_all,
692 						 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
693 		break;
694 	case CHIP_POLARIS10:
695 		amdgpu_program_register_sequence(adev,
696 						 golden_settings_polaris10_a11,
697 						 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
698 		amdgpu_program_register_sequence(adev,
699 						 polaris10_golden_common_all,
700 						 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
701 		WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
702 		if (adev->pdev->revision == 0xc7) {
703 			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
704 			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
705 		}
706 		break;
707 	case CHIP_CARRIZO:
708 		amdgpu_program_register_sequence(adev,
709 						 cz_mgcg_cgcg_init,
710 						 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
711 		amdgpu_program_register_sequence(adev,
712 						 cz_golden_settings_a11,
713 						 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
714 		amdgpu_program_register_sequence(adev,
715 						 cz_golden_common_all,
716 						 (const u32)ARRAY_SIZE(cz_golden_common_all));
717 		break;
718 	case CHIP_STONEY:
719 		amdgpu_program_register_sequence(adev,
720 						 stoney_mgcg_cgcg_init,
721 						 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
722 		amdgpu_program_register_sequence(adev,
723 						 stoney_golden_settings_a11,
724 						 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
725 		amdgpu_program_register_sequence(adev,
726 						 stoney_golden_common_all,
727 						 (const u32)ARRAY_SIZE(stoney_golden_common_all));
728 		break;
729 	default:
730 		break;
731 	}
732 }
733 
734 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
735 {
736 	int i;
737 
738 	adev->gfx.scratch.num_reg = 7;
739 	adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
740 	for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
741 		adev->gfx.scratch.free[i] = true;
742 		adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
743 	}
744 }
745 
746 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
747 {
748 	struct amdgpu_device *adev = ring->adev;
749 	uint32_t scratch;
750 	uint32_t tmp = 0;
751 	unsigned i;
752 	int r;
753 
754 	r = amdgpu_gfx_scratch_get(adev, &scratch);
755 	if (r) {
756 		DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
757 		return r;
758 	}
759 	WREG32(scratch, 0xCAFEDEAD);
760 	r = amdgpu_ring_alloc(ring, 3);
761 	if (r) {
762 		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
763 			  ring->idx, r);
764 		amdgpu_gfx_scratch_free(adev, scratch);
765 		return r;
766 	}
767 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
768 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
769 	amdgpu_ring_write(ring, 0xDEADBEEF);
770 	amdgpu_ring_commit(ring);
771 
772 	for (i = 0; i < adev->usec_timeout; i++) {
773 		tmp = RREG32(scratch);
774 		if (tmp == 0xDEADBEEF)
775 			break;
776 		DRM_UDELAY(1);
777 	}
778 	if (i < adev->usec_timeout) {
779 		DRM_INFO("ring test on %d succeeded in %d usecs\n",
780 			 ring->idx, i);
781 	} else {
782 		DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
783 			  ring->idx, scratch, tmp);
784 		r = -EINVAL;
785 	}
786 	amdgpu_gfx_scratch_free(adev, scratch);
787 	return r;
788 }
789 
790 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
791 {
792 	struct amdgpu_device *adev = ring->adev;
793 	struct amdgpu_ib ib;
794 	struct fence *f = NULL;
795 	uint32_t scratch;
796 	uint32_t tmp = 0;
797 	unsigned i;
798 	int r;
799 
800 	r = amdgpu_gfx_scratch_get(adev, &scratch);
801 	if (r) {
802 		DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
803 		return r;
804 	}
805 	WREG32(scratch, 0xCAFEDEAD);
806 	memset(&ib, 0, sizeof(ib));
807 	r = amdgpu_ib_get(adev, NULL, 256, &ib);
808 	if (r) {
809 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
810 		goto err1;
811 	}
812 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
813 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
814 	ib.ptr[2] = 0xDEADBEEF;
815 	ib.length_dw = 3;
816 
817 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
818 	if (r)
819 		goto err2;
820 
821 	r = fence_wait(f, false);
822 	if (r) {
823 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
824 		goto err2;
825 	}
826 	for (i = 0; i < adev->usec_timeout; i++) {
827 		tmp = RREG32(scratch);
828 		if (tmp == 0xDEADBEEF)
829 			break;
830 		DRM_UDELAY(1);
831 	}
832 	if (i < adev->usec_timeout) {
833 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
834 			 ring->idx, i);
835 		goto err2;
836 	} else {
837 		DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
838 			  scratch, tmp);
839 		r = -EINVAL;
840 	}
841 err2:
842 	fence_put(f);
843 	amdgpu_ib_free(adev, &ib, NULL);
844 	fence_put(f);
845 err1:
846 	amdgpu_gfx_scratch_free(adev, scratch);
847 	return r;
848 }
849 
850 
851 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
852 	release_firmware(adev->gfx.pfp_fw);
853 	adev->gfx.pfp_fw = NULL;
854 	release_firmware(adev->gfx.me_fw);
855 	adev->gfx.me_fw = NULL;
856 	release_firmware(adev->gfx.ce_fw);
857 	adev->gfx.ce_fw = NULL;
858 	release_firmware(adev->gfx.rlc_fw);
859 	adev->gfx.rlc_fw = NULL;
860 	release_firmware(adev->gfx.mec_fw);
861 	adev->gfx.mec_fw = NULL;
862 	if ((adev->asic_type != CHIP_STONEY) &&
863 	    (adev->asic_type != CHIP_TOPAZ))
864 		release_firmware(adev->gfx.mec2_fw);
865 	adev->gfx.mec2_fw = NULL;
866 
867 	kfree(adev->gfx.rlc.register_list_format);
868 }
869 
870 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
871 {
872 	const char *chip_name;
873 	char fw_name[30];
874 	int err;
875 	struct amdgpu_firmware_info *info = NULL;
876 	const struct common_firmware_header *header = NULL;
877 	const struct gfx_firmware_header_v1_0 *cp_hdr;
878 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
879 	unsigned int *tmp = NULL, i;
880 
881 	DRM_DEBUG("\n");
882 
883 	switch (adev->asic_type) {
884 	case CHIP_TOPAZ:
885 		chip_name = "topaz";
886 		break;
887 	case CHIP_TONGA:
888 		chip_name = "tonga";
889 		break;
890 	case CHIP_CARRIZO:
891 		chip_name = "carrizo";
892 		break;
893 	case CHIP_FIJI:
894 		chip_name = "fiji";
895 		break;
896 	case CHIP_POLARIS11:
897 		chip_name = "polaris11";
898 		break;
899 	case CHIP_POLARIS10:
900 		chip_name = "polaris10";
901 		break;
902 	case CHIP_STONEY:
903 		chip_name = "stoney";
904 		break;
905 	default:
906 		BUG();
907 	}
908 
909 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
910 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
911 	if (err)
912 		goto out;
913 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
914 	if (err)
915 		goto out;
916 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
917 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
918 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
919 
920 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
921 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
922 	if (err)
923 		goto out;
924 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
925 	if (err)
926 		goto out;
927 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
928 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
929 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
930 
931 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
932 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
933 	if (err)
934 		goto out;
935 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
936 	if (err)
937 		goto out;
938 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
939 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
940 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
941 
942 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
943 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
944 	if (err)
945 		goto out;
946 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
947 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
948 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
949 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
950 
951 	adev->gfx.rlc.save_and_restore_offset =
952 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
953 	adev->gfx.rlc.clear_state_descriptor_offset =
954 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
955 	adev->gfx.rlc.avail_scratch_ram_locations =
956 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
957 	adev->gfx.rlc.reg_restore_list_size =
958 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
959 	adev->gfx.rlc.reg_list_format_start =
960 			le32_to_cpu(rlc_hdr->reg_list_format_start);
961 	adev->gfx.rlc.reg_list_format_separate_start =
962 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
963 	adev->gfx.rlc.starting_offsets_start =
964 			le32_to_cpu(rlc_hdr->starting_offsets_start);
965 	adev->gfx.rlc.reg_list_format_size_bytes =
966 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
967 	adev->gfx.rlc.reg_list_size_bytes =
968 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
969 
970 	adev->gfx.rlc.register_list_format =
971 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
972 					adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
973 
974 	if (!adev->gfx.rlc.register_list_format) {
975 		err = -ENOMEM;
976 		goto out;
977 	}
978 
979 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
980 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
981 	for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
982 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
983 
984 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
985 
986 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
987 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
988 	for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
989 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
990 
991 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
992 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
993 	if (err)
994 		goto out;
995 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
996 	if (err)
997 		goto out;
998 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
999 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1000 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1001 
1002 	if ((adev->asic_type != CHIP_STONEY) &&
1003 	    (adev->asic_type != CHIP_TOPAZ)) {
1004 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1005 		err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1006 		if (!err) {
1007 			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1008 			if (err)
1009 				goto out;
1010 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1011 				adev->gfx.mec2_fw->data;
1012 			adev->gfx.mec2_fw_version =
1013 				le32_to_cpu(cp_hdr->header.ucode_version);
1014 			adev->gfx.mec2_feature_version =
1015 				le32_to_cpu(cp_hdr->ucode_feature_version);
1016 		} else {
1017 			err = 0;
1018 			adev->gfx.mec2_fw = NULL;
1019 		}
1020 	}
1021 
1022 	if (adev->firmware.smu_load) {
1023 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1024 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1025 		info->fw = adev->gfx.pfp_fw;
1026 		header = (const struct common_firmware_header *)info->fw->data;
1027 		adev->firmware.fw_size +=
1028 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1029 
1030 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1031 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1032 		info->fw = adev->gfx.me_fw;
1033 		header = (const struct common_firmware_header *)info->fw->data;
1034 		adev->firmware.fw_size +=
1035 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1036 
1037 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1038 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1039 		info->fw = adev->gfx.ce_fw;
1040 		header = (const struct common_firmware_header *)info->fw->data;
1041 		adev->firmware.fw_size +=
1042 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1043 
1044 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1045 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1046 		info->fw = adev->gfx.rlc_fw;
1047 		header = (const struct common_firmware_header *)info->fw->data;
1048 		adev->firmware.fw_size +=
1049 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1050 
1051 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1052 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1053 		info->fw = adev->gfx.mec_fw;
1054 		header = (const struct common_firmware_header *)info->fw->data;
1055 		adev->firmware.fw_size +=
1056 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1057 
1058 		if (adev->gfx.mec2_fw) {
1059 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1060 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1061 			info->fw = adev->gfx.mec2_fw;
1062 			header = (const struct common_firmware_header *)info->fw->data;
1063 			adev->firmware.fw_size +=
1064 				ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1065 		}
1066 
1067 	}
1068 
1069 out:
1070 	if (err) {
1071 		dev_err(adev->dev,
1072 			"gfx8: Failed to load firmware \"%s\"\n",
1073 			fw_name);
1074 		release_firmware(adev->gfx.pfp_fw);
1075 		adev->gfx.pfp_fw = NULL;
1076 		release_firmware(adev->gfx.me_fw);
1077 		adev->gfx.me_fw = NULL;
1078 		release_firmware(adev->gfx.ce_fw);
1079 		adev->gfx.ce_fw = NULL;
1080 		release_firmware(adev->gfx.rlc_fw);
1081 		adev->gfx.rlc_fw = NULL;
1082 		release_firmware(adev->gfx.mec_fw);
1083 		adev->gfx.mec_fw = NULL;
1084 		release_firmware(adev->gfx.mec2_fw);
1085 		adev->gfx.mec2_fw = NULL;
1086 	}
1087 	return err;
1088 }
1089 
1090 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1091 				    volatile u32 *buffer)
1092 {
1093 	u32 count = 0, i;
1094 	const struct cs_section_def *sect = NULL;
1095 	const struct cs_extent_def *ext = NULL;
1096 
1097 	if (adev->gfx.rlc.cs_data == NULL)
1098 		return;
1099 	if (buffer == NULL)
1100 		return;
1101 
1102 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1103 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1104 
1105 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1106 	buffer[count++] = cpu_to_le32(0x80000000);
1107 	buffer[count++] = cpu_to_le32(0x80000000);
1108 
1109 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1110 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1111 			if (sect->id == SECT_CONTEXT) {
1112 				buffer[count++] =
1113 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1114 				buffer[count++] = cpu_to_le32(ext->reg_index -
1115 						PACKET3_SET_CONTEXT_REG_START);
1116 				for (i = 0; i < ext->reg_count; i++)
1117 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1118 			} else {
1119 				return;
1120 			}
1121 		}
1122 	}
1123 
1124 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1125 	buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1126 			PACKET3_SET_CONTEXT_REG_START);
1127 	switch (adev->asic_type) {
1128 	case CHIP_TONGA:
1129 	case CHIP_POLARIS10:
1130 		buffer[count++] = cpu_to_le32(0x16000012);
1131 		buffer[count++] = cpu_to_le32(0x0000002A);
1132 		break;
1133 	case CHIP_POLARIS11:
1134 		buffer[count++] = cpu_to_le32(0x16000012);
1135 		buffer[count++] = cpu_to_le32(0x00000000);
1136 		break;
1137 	case CHIP_FIJI:
1138 		buffer[count++] = cpu_to_le32(0x3a00161a);
1139 		buffer[count++] = cpu_to_le32(0x0000002e);
1140 		break;
1141 	case CHIP_TOPAZ:
1142 	case CHIP_CARRIZO:
1143 		buffer[count++] = cpu_to_le32(0x00000002);
1144 		buffer[count++] = cpu_to_le32(0x00000000);
1145 		break;
1146 	case CHIP_STONEY:
1147 		buffer[count++] = cpu_to_le32(0x00000000);
1148 		buffer[count++] = cpu_to_le32(0x00000000);
1149 		break;
1150 	default:
1151 		buffer[count++] = cpu_to_le32(0x00000000);
1152 		buffer[count++] = cpu_to_le32(0x00000000);
1153 		break;
1154 	}
1155 
1156 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1157 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1158 
1159 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1160 	buffer[count++] = cpu_to_le32(0);
1161 }
1162 
1163 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1164 {
1165 	int r;
1166 
1167 	/* clear state block */
1168 	if (adev->gfx.rlc.clear_state_obj) {
1169 		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1170 		if (unlikely(r != 0))
1171 			dev_warn(adev->dev, "(%d) reserve RLC c bo failed\n", r);
1172 		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1173 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1174 
1175 		amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1176 		adev->gfx.rlc.clear_state_obj = NULL;
1177 	}
1178 }
1179 
1180 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1181 {
1182 	volatile u32 *dst_ptr;
1183 	u32 dws;
1184 	const struct cs_section_def *cs_data;
1185 	int r;
1186 
1187 	adev->gfx.rlc.cs_data = vi_cs_data;
1188 
1189 	cs_data = adev->gfx.rlc.cs_data;
1190 
1191 	if (cs_data) {
1192 		/* clear state block */
1193 		adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1194 
1195 		if (adev->gfx.rlc.clear_state_obj == NULL) {
1196 			r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1197 					     AMDGPU_GEM_DOMAIN_VRAM,
1198 					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1199 					     NULL, NULL,
1200 					     &adev->gfx.rlc.clear_state_obj);
1201 			if (r) {
1202 				dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1203 				gfx_v8_0_rlc_fini(adev);
1204 				return r;
1205 			}
1206 		}
1207 		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1208 		if (unlikely(r != 0)) {
1209 			gfx_v8_0_rlc_fini(adev);
1210 			return r;
1211 		}
1212 		r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1213 				  &adev->gfx.rlc.clear_state_gpu_addr);
1214 		if (r) {
1215 			amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1216 			dev_warn(adev->dev, "(%d) pin RLC c bo failed\n", r);
1217 			gfx_v8_0_rlc_fini(adev);
1218 			return r;
1219 		}
1220 
1221 		r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1222 		if (r) {
1223 			dev_warn(adev->dev, "(%d) map RLC c bo failed\n", r);
1224 			gfx_v8_0_rlc_fini(adev);
1225 			return r;
1226 		}
1227 		/* set up the cs buffer */
1228 		dst_ptr = adev->gfx.rlc.cs_ptr;
1229 		gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1230 		amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1231 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1232 	}
1233 
1234 	return 0;
1235 }
1236 
1237 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1238 {
1239 	int r;
1240 
1241 	if (adev->gfx.mec.hpd_eop_obj) {
1242 		r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1243 		if (unlikely(r != 0))
1244 			dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1245 		amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1246 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1247 
1248 		amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1249 		adev->gfx.mec.hpd_eop_obj = NULL;
1250 	}
1251 }
1252 
1253 #define MEC_HPD_SIZE 2048
1254 
1255 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1256 {
1257 	int r;
1258 	u32 *hpd;
1259 
1260 	/*
1261 	 * we assign only 1 pipe because all other pipes will
1262 	 * be handled by KFD
1263 	 */
1264 	adev->gfx.mec.num_mec = 1;
1265 	adev->gfx.mec.num_pipe = 1;
1266 	adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1267 
1268 	if (adev->gfx.mec.hpd_eop_obj == NULL) {
1269 		r = amdgpu_bo_create(adev,
1270 				     adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
1271 				     PAGE_SIZE, true,
1272 				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1273 				     &adev->gfx.mec.hpd_eop_obj);
1274 		if (r) {
1275 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1276 			return r;
1277 		}
1278 	}
1279 
1280 	r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1281 	if (unlikely(r != 0)) {
1282 		gfx_v8_0_mec_fini(adev);
1283 		return r;
1284 	}
1285 	r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1286 			  &adev->gfx.mec.hpd_eop_gpu_addr);
1287 	if (r) {
1288 		dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1289 		gfx_v8_0_mec_fini(adev);
1290 		return r;
1291 	}
1292 	r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1293 	if (r) {
1294 		dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1295 		gfx_v8_0_mec_fini(adev);
1296 		return r;
1297 	}
1298 
1299 	memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
1300 
1301 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1302 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1303 
1304 	return 0;
1305 }
1306 
1307 static const u32 vgpr_init_compute_shader[] =
1308 {
1309 	0x7e000209, 0x7e020208,
1310 	0x7e040207, 0x7e060206,
1311 	0x7e080205, 0x7e0a0204,
1312 	0x7e0c0203, 0x7e0e0202,
1313 	0x7e100201, 0x7e120200,
1314 	0x7e140209, 0x7e160208,
1315 	0x7e180207, 0x7e1a0206,
1316 	0x7e1c0205, 0x7e1e0204,
1317 	0x7e200203, 0x7e220202,
1318 	0x7e240201, 0x7e260200,
1319 	0x7e280209, 0x7e2a0208,
1320 	0x7e2c0207, 0x7e2e0206,
1321 	0x7e300205, 0x7e320204,
1322 	0x7e340203, 0x7e360202,
1323 	0x7e380201, 0x7e3a0200,
1324 	0x7e3c0209, 0x7e3e0208,
1325 	0x7e400207, 0x7e420206,
1326 	0x7e440205, 0x7e460204,
1327 	0x7e480203, 0x7e4a0202,
1328 	0x7e4c0201, 0x7e4e0200,
1329 	0x7e500209, 0x7e520208,
1330 	0x7e540207, 0x7e560206,
1331 	0x7e580205, 0x7e5a0204,
1332 	0x7e5c0203, 0x7e5e0202,
1333 	0x7e600201, 0x7e620200,
1334 	0x7e640209, 0x7e660208,
1335 	0x7e680207, 0x7e6a0206,
1336 	0x7e6c0205, 0x7e6e0204,
1337 	0x7e700203, 0x7e720202,
1338 	0x7e740201, 0x7e760200,
1339 	0x7e780209, 0x7e7a0208,
1340 	0x7e7c0207, 0x7e7e0206,
1341 	0xbf8a0000, 0xbf810000,
1342 };
1343 
1344 static const u32 sgpr_init_compute_shader[] =
1345 {
1346 	0xbe8a0100, 0xbe8c0102,
1347 	0xbe8e0104, 0xbe900106,
1348 	0xbe920108, 0xbe940100,
1349 	0xbe960102, 0xbe980104,
1350 	0xbe9a0106, 0xbe9c0108,
1351 	0xbe9e0100, 0xbea00102,
1352 	0xbea20104, 0xbea40106,
1353 	0xbea60108, 0xbea80100,
1354 	0xbeaa0102, 0xbeac0104,
1355 	0xbeae0106, 0xbeb00108,
1356 	0xbeb20100, 0xbeb40102,
1357 	0xbeb60104, 0xbeb80106,
1358 	0xbeba0108, 0xbebc0100,
1359 	0xbebe0102, 0xbec00104,
1360 	0xbec20106, 0xbec40108,
1361 	0xbec60100, 0xbec80102,
1362 	0xbee60004, 0xbee70005,
1363 	0xbeea0006, 0xbeeb0007,
1364 	0xbee80008, 0xbee90009,
1365 	0xbefc0000, 0xbf8a0000,
1366 	0xbf810000, 0x00000000,
1367 };
1368 
1369 static const u32 vgpr_init_regs[] =
1370 {
1371 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1372 	mmCOMPUTE_RESOURCE_LIMITS, 0,
1373 	mmCOMPUTE_NUM_THREAD_X, 256*4,
1374 	mmCOMPUTE_NUM_THREAD_Y, 1,
1375 	mmCOMPUTE_NUM_THREAD_Z, 1,
1376 	mmCOMPUTE_PGM_RSRC2, 20,
1377 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1378 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1379 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1380 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1381 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1382 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1383 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1384 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1385 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1386 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1387 };
1388 
1389 static const u32 sgpr1_init_regs[] =
1390 {
1391 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1392 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1393 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1394 	mmCOMPUTE_NUM_THREAD_Y, 1,
1395 	mmCOMPUTE_NUM_THREAD_Z, 1,
1396 	mmCOMPUTE_PGM_RSRC2, 20,
1397 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1398 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1399 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1400 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1401 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1402 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1403 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1404 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1405 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1406 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1407 };
1408 
1409 static const u32 sgpr2_init_regs[] =
1410 {
1411 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1412 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1413 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1414 	mmCOMPUTE_NUM_THREAD_Y, 1,
1415 	mmCOMPUTE_NUM_THREAD_Z, 1,
1416 	mmCOMPUTE_PGM_RSRC2, 20,
1417 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1418 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1419 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1420 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1421 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1422 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1423 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1424 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1425 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1426 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1427 };
1428 
1429 static const u32 sec_ded_counter_registers[] =
1430 {
1431 	mmCPC_EDC_ATC_CNT,
1432 	mmCPC_EDC_SCRATCH_CNT,
1433 	mmCPC_EDC_UCODE_CNT,
1434 	mmCPF_EDC_ATC_CNT,
1435 	mmCPF_EDC_ROQ_CNT,
1436 	mmCPF_EDC_TAG_CNT,
1437 	mmCPG_EDC_ATC_CNT,
1438 	mmCPG_EDC_DMA_CNT,
1439 	mmCPG_EDC_TAG_CNT,
1440 	mmDC_EDC_CSINVOC_CNT,
1441 	mmDC_EDC_RESTORE_CNT,
1442 	mmDC_EDC_STATE_CNT,
1443 	mmGDS_EDC_CNT,
1444 	mmGDS_EDC_GRBM_CNT,
1445 	mmGDS_EDC_OA_DED,
1446 	mmSPI_EDC_CNT,
1447 	mmSQC_ATC_EDC_GATCL1_CNT,
1448 	mmSQC_EDC_CNT,
1449 	mmSQ_EDC_DED_CNT,
1450 	mmSQ_EDC_INFO,
1451 	mmSQ_EDC_SEC_CNT,
1452 	mmTCC_EDC_CNT,
1453 	mmTCP_ATC_EDC_GATCL1_CNT,
1454 	mmTCP_EDC_CNT,
1455 	mmTD_EDC_CNT
1456 };
1457 
1458 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1459 {
1460 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1461 	struct amdgpu_ib ib;
1462 	struct fence *f = NULL;
1463 	int r, i;
1464 	u32 tmp;
1465 	unsigned total_size, vgpr_offset, sgpr_offset;
1466 	u64 gpu_addr;
1467 
1468 	/* only supported on CZ */
1469 	if (adev->asic_type != CHIP_CARRIZO)
1470 		return 0;
1471 
1472 	/* bail if the compute ring is not ready */
1473 	if (!ring->ready)
1474 		return 0;
1475 
1476 	tmp = RREG32(mmGB_EDC_MODE);
1477 	WREG32(mmGB_EDC_MODE, 0);
1478 
1479 	total_size =
1480 		(((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1481 	total_size +=
1482 		(((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1483 	total_size +=
1484 		(((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1485 	total_size = ALIGN(total_size, 256);
1486 	vgpr_offset = total_size;
1487 	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1488 	sgpr_offset = total_size;
1489 	total_size += sizeof(sgpr_init_compute_shader);
1490 
1491 	/* allocate an indirect buffer to put the commands in */
1492 	memset(&ib, 0, sizeof(ib));
1493 	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1494 	if (r) {
1495 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1496 		return r;
1497 	}
1498 
1499 	/* load the compute shaders */
1500 	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1501 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1502 
1503 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1504 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1505 
1506 	/* init the ib length to 0 */
1507 	ib.length_dw = 0;
1508 
1509 	/* VGPR */
1510 	/* write the register state for the compute dispatch */
1511 	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1512 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1513 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1514 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1515 	}
1516 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1517 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1518 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1519 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1520 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1521 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1522 
1523 	/* write dispatch packet */
1524 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1525 	ib.ptr[ib.length_dw++] = 8; /* x */
1526 	ib.ptr[ib.length_dw++] = 1; /* y */
1527 	ib.ptr[ib.length_dw++] = 1; /* z */
1528 	ib.ptr[ib.length_dw++] =
1529 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1530 
1531 	/* write CS partial flush packet */
1532 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1533 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1534 
1535 	/* SGPR1 */
1536 	/* write the register state for the compute dispatch */
1537 	for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1538 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1539 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1540 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1541 	}
1542 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1543 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1544 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1545 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1546 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1547 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1548 
1549 	/* write dispatch packet */
1550 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1551 	ib.ptr[ib.length_dw++] = 8; /* x */
1552 	ib.ptr[ib.length_dw++] = 1; /* y */
1553 	ib.ptr[ib.length_dw++] = 1; /* z */
1554 	ib.ptr[ib.length_dw++] =
1555 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1556 
1557 	/* write CS partial flush packet */
1558 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1559 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1560 
1561 	/* SGPR2 */
1562 	/* write the register state for the compute dispatch */
1563 	for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1564 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1565 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1566 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1567 	}
1568 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1569 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1570 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1571 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1572 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1573 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1574 
1575 	/* write dispatch packet */
1576 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1577 	ib.ptr[ib.length_dw++] = 8; /* x */
1578 	ib.ptr[ib.length_dw++] = 1; /* y */
1579 	ib.ptr[ib.length_dw++] = 1; /* z */
1580 	ib.ptr[ib.length_dw++] =
1581 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1582 
1583 	/* write CS partial flush packet */
1584 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1585 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1586 
1587 	/* shedule the ib on the ring */
1588 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
1589 	if (r) {
1590 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1591 		goto fail;
1592 	}
1593 
1594 	/* wait for the GPU to finish processing the IB */
1595 	r = fence_wait(f, false);
1596 	if (r) {
1597 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1598 		goto fail;
1599 	}
1600 
1601 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1602 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1603 	WREG32(mmGB_EDC_MODE, tmp);
1604 
1605 	tmp = RREG32(mmCC_GC_EDC_CONFIG);
1606 	tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1607 	WREG32(mmCC_GC_EDC_CONFIG, tmp);
1608 
1609 
1610 	/* read back registers to clear the counters */
1611 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1612 		RREG32(sec_ded_counter_registers[i]);
1613 
1614 fail:
1615 	fence_put(f);
1616 	amdgpu_ib_free(adev, &ib, NULL);
1617 	fence_put(f);
1618 
1619 	return r;
1620 }
1621 
1622 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1623 {
1624 	u32 gb_addr_config;
1625 	u32 mc_shared_chmap, mc_arb_ramcfg;
1626 	u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1627 	u32 tmp;
1628 	int ret;
1629 
1630 	switch (adev->asic_type) {
1631 	case CHIP_TOPAZ:
1632 		adev->gfx.config.max_shader_engines = 1;
1633 		adev->gfx.config.max_tile_pipes = 2;
1634 		adev->gfx.config.max_cu_per_sh = 6;
1635 		adev->gfx.config.max_sh_per_se = 1;
1636 		adev->gfx.config.max_backends_per_se = 2;
1637 		adev->gfx.config.max_texture_channel_caches = 2;
1638 		adev->gfx.config.max_gprs = 256;
1639 		adev->gfx.config.max_gs_threads = 32;
1640 		adev->gfx.config.max_hw_contexts = 8;
1641 
1642 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1643 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1644 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1645 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1646 		gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1647 		break;
1648 	case CHIP_FIJI:
1649 		adev->gfx.config.max_shader_engines = 4;
1650 		adev->gfx.config.max_tile_pipes = 16;
1651 		adev->gfx.config.max_cu_per_sh = 16;
1652 		adev->gfx.config.max_sh_per_se = 1;
1653 		adev->gfx.config.max_backends_per_se = 4;
1654 		adev->gfx.config.max_texture_channel_caches = 16;
1655 		adev->gfx.config.max_gprs = 256;
1656 		adev->gfx.config.max_gs_threads = 32;
1657 		adev->gfx.config.max_hw_contexts = 8;
1658 
1659 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1660 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1661 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1662 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1663 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1664 		break;
1665 	case CHIP_POLARIS11:
1666 		ret = amdgpu_atombios_get_gfx_info(adev);
1667 		if (ret)
1668 			return ret;
1669 		adev->gfx.config.max_gprs = 256;
1670 		adev->gfx.config.max_gs_threads = 32;
1671 		adev->gfx.config.max_hw_contexts = 8;
1672 
1673 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1674 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1675 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1676 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1677 		gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1678 		break;
1679 	case CHIP_POLARIS10:
1680 		ret = amdgpu_atombios_get_gfx_info(adev);
1681 		if (ret)
1682 			return ret;
1683 		adev->gfx.config.max_gprs = 256;
1684 		adev->gfx.config.max_gs_threads = 32;
1685 		adev->gfx.config.max_hw_contexts = 8;
1686 
1687 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1688 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1689 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1690 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1691 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1692 		break;
1693 	case CHIP_TONGA:
1694 		adev->gfx.config.max_shader_engines = 4;
1695 		adev->gfx.config.max_tile_pipes = 8;
1696 		adev->gfx.config.max_cu_per_sh = 8;
1697 		adev->gfx.config.max_sh_per_se = 1;
1698 		adev->gfx.config.max_backends_per_se = 2;
1699 		adev->gfx.config.max_texture_channel_caches = 8;
1700 		adev->gfx.config.max_gprs = 256;
1701 		adev->gfx.config.max_gs_threads = 32;
1702 		adev->gfx.config.max_hw_contexts = 8;
1703 
1704 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1705 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1706 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1707 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1708 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1709 		break;
1710 	case CHIP_CARRIZO:
1711 		adev->gfx.config.max_shader_engines = 1;
1712 		adev->gfx.config.max_tile_pipes = 2;
1713 		adev->gfx.config.max_sh_per_se = 1;
1714 		adev->gfx.config.max_backends_per_se = 2;
1715 
1716 		switch (adev->pdev->revision) {
1717 		case 0xc4:
1718 		case 0x84:
1719 		case 0xc8:
1720 		case 0xcc:
1721 		case 0xe1:
1722 		case 0xe3:
1723 			/* B10 */
1724 			adev->gfx.config.max_cu_per_sh = 8;
1725 			break;
1726 		case 0xc5:
1727 		case 0x81:
1728 		case 0x85:
1729 		case 0xc9:
1730 		case 0xcd:
1731 		case 0xe2:
1732 		case 0xe4:
1733 			/* B8 */
1734 			adev->gfx.config.max_cu_per_sh = 6;
1735 			break;
1736 		case 0xc6:
1737 		case 0xca:
1738 		case 0xce:
1739 		case 0x88:
1740 			/* B6 */
1741 			adev->gfx.config.max_cu_per_sh = 6;
1742 			break;
1743 		case 0xc7:
1744 		case 0x87:
1745 		case 0xcb:
1746 		case 0xe5:
1747 		case 0x89:
1748 		default:
1749 			/* B4 */
1750 			adev->gfx.config.max_cu_per_sh = 4;
1751 			break;
1752 		}
1753 
1754 		adev->gfx.config.max_texture_channel_caches = 2;
1755 		adev->gfx.config.max_gprs = 256;
1756 		adev->gfx.config.max_gs_threads = 32;
1757 		adev->gfx.config.max_hw_contexts = 8;
1758 
1759 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1760 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1761 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1762 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1763 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1764 		break;
1765 	case CHIP_STONEY:
1766 		adev->gfx.config.max_shader_engines = 1;
1767 		adev->gfx.config.max_tile_pipes = 2;
1768 		adev->gfx.config.max_sh_per_se = 1;
1769 		adev->gfx.config.max_backends_per_se = 1;
1770 
1771 		switch (adev->pdev->revision) {
1772 		case 0xc0:
1773 		case 0xc1:
1774 		case 0xc2:
1775 		case 0xc4:
1776 		case 0xc8:
1777 		case 0xc9:
1778 			adev->gfx.config.max_cu_per_sh = 3;
1779 			break;
1780 		case 0xd0:
1781 		case 0xd1:
1782 		case 0xd2:
1783 		default:
1784 			adev->gfx.config.max_cu_per_sh = 2;
1785 			break;
1786 		}
1787 
1788 		adev->gfx.config.max_texture_channel_caches = 2;
1789 		adev->gfx.config.max_gprs = 256;
1790 		adev->gfx.config.max_gs_threads = 16;
1791 		adev->gfx.config.max_hw_contexts = 8;
1792 
1793 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1794 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1795 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1796 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1797 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1798 		break;
1799 	default:
1800 		adev->gfx.config.max_shader_engines = 2;
1801 		adev->gfx.config.max_tile_pipes = 4;
1802 		adev->gfx.config.max_cu_per_sh = 2;
1803 		adev->gfx.config.max_sh_per_se = 1;
1804 		adev->gfx.config.max_backends_per_se = 2;
1805 		adev->gfx.config.max_texture_channel_caches = 4;
1806 		adev->gfx.config.max_gprs = 256;
1807 		adev->gfx.config.max_gs_threads = 32;
1808 		adev->gfx.config.max_hw_contexts = 8;
1809 
1810 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1811 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1812 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1813 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1814 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1815 		break;
1816 	}
1817 
1818 	mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1819 	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1820 	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1821 
1822 	adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1823 	adev->gfx.config.mem_max_burst_length_bytes = 256;
1824 	if (adev->flags & AMD_IS_APU) {
1825 		/* Get memory bank mapping mode. */
1826 		tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1827 		dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1828 		dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1829 
1830 		tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1831 		dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1832 		dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1833 
1834 		/* Validate settings in case only one DIMM installed. */
1835 		if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1836 			dimm00_addr_map = 0;
1837 		if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1838 			dimm01_addr_map = 0;
1839 		if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1840 			dimm10_addr_map = 0;
1841 		if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1842 			dimm11_addr_map = 0;
1843 
1844 		/* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1845 		/* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1846 		if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1847 			adev->gfx.config.mem_row_size_in_kb = 2;
1848 		else
1849 			adev->gfx.config.mem_row_size_in_kb = 1;
1850 	} else {
1851 		tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1852 		adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1853 		if (adev->gfx.config.mem_row_size_in_kb > 4)
1854 			adev->gfx.config.mem_row_size_in_kb = 4;
1855 	}
1856 
1857 	adev->gfx.config.shader_engine_tile_size = 32;
1858 	adev->gfx.config.num_gpus = 1;
1859 	adev->gfx.config.multi_gpu_tile_size = 64;
1860 
1861 	/* fix up row size */
1862 	switch (adev->gfx.config.mem_row_size_in_kb) {
1863 	case 1:
1864 	default:
1865 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1866 		break;
1867 	case 2:
1868 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1869 		break;
1870 	case 4:
1871 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1872 		break;
1873 	}
1874 	adev->gfx.config.gb_addr_config = gb_addr_config;
1875 
1876 	return 0;
1877 }
1878 
1879 static int gfx_v8_0_sw_init(void *handle)
1880 {
1881 	int i, r;
1882 	struct amdgpu_ring *ring;
1883 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1884 
1885 	/* EOP Event */
1886 	r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1887 	if (r)
1888 		return r;
1889 
1890 	/* Privileged reg */
1891 	r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1892 	if (r)
1893 		return r;
1894 
1895 	/* Privileged inst */
1896 	r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
1897 	if (r)
1898 		return r;
1899 
1900 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1901 
1902 	gfx_v8_0_scratch_init(adev);
1903 
1904 	r = gfx_v8_0_init_microcode(adev);
1905 	if (r) {
1906 		DRM_ERROR("Failed to load gfx firmware!\n");
1907 		return r;
1908 	}
1909 
1910 	r = gfx_v8_0_rlc_init(adev);
1911 	if (r) {
1912 		DRM_ERROR("Failed to init rlc BOs!\n");
1913 		return r;
1914 	}
1915 
1916 	r = gfx_v8_0_mec_init(adev);
1917 	if (r) {
1918 		DRM_ERROR("Failed to init MEC BOs!\n");
1919 		return r;
1920 	}
1921 
1922 	/* set up the gfx ring */
1923 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1924 		ring = &adev->gfx.gfx_ring[i];
1925 		ring->ring_obj = NULL;
1926 		sprintf(ring->name, "gfx");
1927 		/* no gfx doorbells on iceland */
1928 		if (adev->asic_type != CHIP_TOPAZ) {
1929 			ring->use_doorbell = true;
1930 			ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
1931 		}
1932 
1933 		r = amdgpu_ring_init(adev, ring, 1024,
1934 				     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1935 				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
1936 				     AMDGPU_RING_TYPE_GFX);
1937 		if (r)
1938 			return r;
1939 	}
1940 
1941 	/* set up the compute queues */
1942 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1943 		unsigned irq_type;
1944 
1945 		/* max 32 queues per MEC */
1946 		if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
1947 			DRM_ERROR("Too many (%d) compute rings!\n", i);
1948 			break;
1949 		}
1950 		ring = &adev->gfx.compute_ring[i];
1951 		ring->ring_obj = NULL;
1952 		ring->use_doorbell = true;
1953 		ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
1954 		ring->me = 1; /* first MEC */
1955 		ring->pipe = i / 8;
1956 		ring->queue = i % 8;
1957 		sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1958 		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
1959 		/* type-2 packets are deprecated on MEC, use type-3 instead */
1960 		r = amdgpu_ring_init(adev, ring, 1024,
1961 				     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1962 				     &adev->gfx.eop_irq, irq_type,
1963 				     AMDGPU_RING_TYPE_COMPUTE);
1964 		if (r)
1965 			return r;
1966 	}
1967 
1968 	/* reserve GDS, GWS and OA resource for gfx */
1969 	r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
1970 			PAGE_SIZE, true,
1971 			AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
1972 			NULL, &adev->gds.gds_gfx_bo);
1973 	if (r)
1974 		return r;
1975 
1976 	r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
1977 		PAGE_SIZE, true,
1978 		AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
1979 		NULL, &adev->gds.gws_gfx_bo);
1980 	if (r)
1981 		return r;
1982 
1983 	r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
1984 			PAGE_SIZE, true,
1985 			AMDGPU_GEM_DOMAIN_OA, 0, NULL,
1986 			NULL, &adev->gds.oa_gfx_bo);
1987 	if (r)
1988 		return r;
1989 
1990 	adev->gfx.ce_ram_size = 0x8000;
1991 
1992 	r = gfx_v8_0_gpu_early_init(adev);
1993 	if (r)
1994 		return r;
1995 
1996 	return 0;
1997 }
1998 
1999 static int gfx_v8_0_sw_fini(void *handle)
2000 {
2001 	int i;
2002 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2003 
2004 	amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
2005 	amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
2006 	amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
2007 
2008 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2009 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2010 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2011 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2012 
2013 	gfx_v8_0_mec_fini(adev);
2014 
2015 	gfx_v8_0_rlc_fini(adev);
2016 
2017 	gfx_v8_0_free_microcode(adev);
2018 
2019 	return 0;
2020 }
2021 
2022 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2023 {
2024 	uint32_t *modearray, *mod2array;
2025 	const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2026 	const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2027 	u32 reg_offset;
2028 
2029 	modearray = adev->gfx.config.tile_mode_array;
2030 	mod2array = adev->gfx.config.macrotile_mode_array;
2031 
2032 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2033 		modearray[reg_offset] = 0;
2034 
2035 	for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2036 		mod2array[reg_offset] = 0;
2037 
2038 	switch (adev->asic_type) {
2039 	case CHIP_TOPAZ:
2040 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2041 				PIPE_CONFIG(ADDR_SURF_P2) |
2042 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2043 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2044 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2045 				PIPE_CONFIG(ADDR_SURF_P2) |
2046 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2047 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2048 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2049 				PIPE_CONFIG(ADDR_SURF_P2) |
2050 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2051 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2052 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2053 				PIPE_CONFIG(ADDR_SURF_P2) |
2054 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2055 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2056 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2057 				PIPE_CONFIG(ADDR_SURF_P2) |
2058 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2059 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2060 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2061 				PIPE_CONFIG(ADDR_SURF_P2) |
2062 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2063 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2064 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2065 				PIPE_CONFIG(ADDR_SURF_P2) |
2066 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2067 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2068 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2069 				PIPE_CONFIG(ADDR_SURF_P2));
2070 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2071 				PIPE_CONFIG(ADDR_SURF_P2) |
2072 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2073 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2074 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2075 				 PIPE_CONFIG(ADDR_SURF_P2) |
2076 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2077 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2078 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2079 				 PIPE_CONFIG(ADDR_SURF_P2) |
2080 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2081 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2082 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2083 				 PIPE_CONFIG(ADDR_SURF_P2) |
2084 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2085 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2086 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2087 				 PIPE_CONFIG(ADDR_SURF_P2) |
2088 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2089 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2090 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2091 				 PIPE_CONFIG(ADDR_SURF_P2) |
2092 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2093 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2094 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2095 				 PIPE_CONFIG(ADDR_SURF_P2) |
2096 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2097 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2098 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2099 				 PIPE_CONFIG(ADDR_SURF_P2) |
2100 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2101 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2102 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2103 				 PIPE_CONFIG(ADDR_SURF_P2) |
2104 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2105 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2106 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2107 				 PIPE_CONFIG(ADDR_SURF_P2) |
2108 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2109 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2110 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2111 				 PIPE_CONFIG(ADDR_SURF_P2) |
2112 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2113 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2114 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2115 				 PIPE_CONFIG(ADDR_SURF_P2) |
2116 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2117 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2118 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2119 				 PIPE_CONFIG(ADDR_SURF_P2) |
2120 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2121 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2122 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2123 				 PIPE_CONFIG(ADDR_SURF_P2) |
2124 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2125 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2126 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2127 				 PIPE_CONFIG(ADDR_SURF_P2) |
2128 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2129 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2130 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2131 				 PIPE_CONFIG(ADDR_SURF_P2) |
2132 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2133 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2134 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2135 				 PIPE_CONFIG(ADDR_SURF_P2) |
2136 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2137 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2138 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2139 				 PIPE_CONFIG(ADDR_SURF_P2) |
2140 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2141 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2142 
2143 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2144 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2145 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2146 				NUM_BANKS(ADDR_SURF_8_BANK));
2147 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2148 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2149 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2150 				NUM_BANKS(ADDR_SURF_8_BANK));
2151 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2152 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2153 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2154 				NUM_BANKS(ADDR_SURF_8_BANK));
2155 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2156 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2157 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2158 				NUM_BANKS(ADDR_SURF_8_BANK));
2159 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2160 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2161 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2162 				NUM_BANKS(ADDR_SURF_8_BANK));
2163 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2164 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2165 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2166 				NUM_BANKS(ADDR_SURF_8_BANK));
2167 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2168 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2169 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2170 				NUM_BANKS(ADDR_SURF_8_BANK));
2171 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2172 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2173 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2174 				NUM_BANKS(ADDR_SURF_16_BANK));
2175 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2176 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2177 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2178 				NUM_BANKS(ADDR_SURF_16_BANK));
2179 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2180 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2181 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2182 				 NUM_BANKS(ADDR_SURF_16_BANK));
2183 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2184 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2185 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2186 				 NUM_BANKS(ADDR_SURF_16_BANK));
2187 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2188 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2189 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2190 				 NUM_BANKS(ADDR_SURF_16_BANK));
2191 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2192 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2193 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2194 				 NUM_BANKS(ADDR_SURF_16_BANK));
2195 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2196 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2197 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2198 				 NUM_BANKS(ADDR_SURF_8_BANK));
2199 
2200 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2201 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2202 			    reg_offset != 23)
2203 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2204 
2205 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2206 			if (reg_offset != 7)
2207 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2208 
2209 		break;
2210 	case CHIP_FIJI:
2211 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2212 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2213 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2214 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2215 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2216 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2217 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2218 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2219 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2220 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2221 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2222 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2223 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2224 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2225 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2226 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2227 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2228 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2229 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2230 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2231 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2232 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2233 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2234 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2235 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2236 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2237 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2238 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2239 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2240 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2241 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2242 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2243 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2244 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2245 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2246 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2247 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2248 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2249 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2250 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2251 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2252 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2253 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2254 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2255 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2256 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2257 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2258 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2259 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2260 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2261 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2262 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2263 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2264 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2265 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2266 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2267 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2268 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2269 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2270 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2271 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2272 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2273 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2274 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2275 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2276 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2277 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2278 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2279 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2280 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2281 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2282 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2283 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2284 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2285 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2286 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2287 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2288 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2289 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2290 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2291 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2292 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2293 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2294 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2295 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2296 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2297 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2298 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2299 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2300 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2301 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2302 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2303 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2304 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2305 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2306 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2307 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2308 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2309 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2310 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2311 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2312 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2313 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2314 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2315 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2316 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2317 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2318 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2319 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2320 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2321 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2322 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2323 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2324 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2325 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2326 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2327 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2328 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2329 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2330 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2331 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2332 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2333 
2334 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2335 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2336 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2337 				NUM_BANKS(ADDR_SURF_8_BANK));
2338 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2339 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2340 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2341 				NUM_BANKS(ADDR_SURF_8_BANK));
2342 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2343 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2344 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2345 				NUM_BANKS(ADDR_SURF_8_BANK));
2346 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2347 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2348 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2349 				NUM_BANKS(ADDR_SURF_8_BANK));
2350 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2351 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2352 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2353 				NUM_BANKS(ADDR_SURF_8_BANK));
2354 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2355 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2356 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2357 				NUM_BANKS(ADDR_SURF_8_BANK));
2358 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2359 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2360 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2361 				NUM_BANKS(ADDR_SURF_8_BANK));
2362 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2363 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2364 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2365 				NUM_BANKS(ADDR_SURF_8_BANK));
2366 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2367 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2368 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2369 				NUM_BANKS(ADDR_SURF_8_BANK));
2370 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2371 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2372 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2373 				 NUM_BANKS(ADDR_SURF_8_BANK));
2374 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2375 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2376 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2377 				 NUM_BANKS(ADDR_SURF_8_BANK));
2378 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2379 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2380 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2381 				 NUM_BANKS(ADDR_SURF_8_BANK));
2382 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2383 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2384 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2385 				 NUM_BANKS(ADDR_SURF_8_BANK));
2386 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2387 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2388 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2389 				 NUM_BANKS(ADDR_SURF_4_BANK));
2390 
2391 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2392 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2393 
2394 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2395 			if (reg_offset != 7)
2396 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2397 
2398 		break;
2399 	case CHIP_TONGA:
2400 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2401 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2402 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2403 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2404 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2405 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2406 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2407 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2408 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2409 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2410 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2411 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2412 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2413 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2414 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2415 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2416 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2417 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2418 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2419 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2420 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2421 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2422 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2423 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2424 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2425 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2426 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2427 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2428 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2429 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2430 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2431 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2432 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2433 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2434 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2435 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2436 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2437 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2438 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2439 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2440 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2441 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2442 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2443 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2444 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2445 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2446 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2447 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2448 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2449 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2450 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2451 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2452 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2453 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2454 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2455 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2456 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2457 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2458 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2459 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2460 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2461 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2462 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2463 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2464 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2465 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2466 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2467 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2468 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2469 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2470 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2471 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2472 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2473 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2474 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2475 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2476 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2477 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2478 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2479 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2480 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2481 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2482 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2483 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2484 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2485 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2486 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2487 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2488 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2489 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2490 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2491 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2492 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2493 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2494 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2495 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2496 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2497 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2498 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2499 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2500 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2501 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2502 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2503 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2504 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2505 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2506 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2507 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2508 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2509 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2510 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2511 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2512 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2513 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2514 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2515 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2516 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2517 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2518 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2519 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2520 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2521 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2522 
2523 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2524 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2525 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2526 				NUM_BANKS(ADDR_SURF_16_BANK));
2527 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2528 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2529 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2530 				NUM_BANKS(ADDR_SURF_16_BANK));
2531 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2532 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2533 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2534 				NUM_BANKS(ADDR_SURF_16_BANK));
2535 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2536 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2537 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2538 				NUM_BANKS(ADDR_SURF_16_BANK));
2539 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2540 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2541 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2542 				NUM_BANKS(ADDR_SURF_16_BANK));
2543 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2544 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2545 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2546 				NUM_BANKS(ADDR_SURF_16_BANK));
2547 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2548 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2549 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2550 				NUM_BANKS(ADDR_SURF_16_BANK));
2551 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2552 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2553 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2554 				NUM_BANKS(ADDR_SURF_16_BANK));
2555 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2556 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2557 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2558 				NUM_BANKS(ADDR_SURF_16_BANK));
2559 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2560 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2561 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2562 				 NUM_BANKS(ADDR_SURF_16_BANK));
2563 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2564 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2565 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2566 				 NUM_BANKS(ADDR_SURF_16_BANK));
2567 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2568 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2569 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2570 				 NUM_BANKS(ADDR_SURF_8_BANK));
2571 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2572 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2573 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2574 				 NUM_BANKS(ADDR_SURF_4_BANK));
2575 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2576 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2577 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2578 				 NUM_BANKS(ADDR_SURF_4_BANK));
2579 
2580 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2581 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2582 
2583 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2584 			if (reg_offset != 7)
2585 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2586 
2587 		break;
2588 	case CHIP_POLARIS11:
2589 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2590 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2591 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2592 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2593 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2594 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2595 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2596 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2597 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2598 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2599 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2600 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2601 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2602 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2603 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2604 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2605 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2606 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2607 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2608 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2609 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2610 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2611 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2612 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2613 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2614 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2615 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2616 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2617 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2618 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2619 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2620 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2621 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2622 				PIPE_CONFIG(ADDR_SURF_P4_16x16));
2623 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2624 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2625 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2626 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2627 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2628 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2629 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2630 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2631 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2632 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2633 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2634 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2635 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2636 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2637 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2638 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2639 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2640 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2641 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2642 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2643 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2644 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2645 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2646 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2647 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2648 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2649 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2650 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2651 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2652 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2653 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2654 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2655 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2656 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2657 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2658 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2659 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2660 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2661 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2662 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2663 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2664 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2665 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2666 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2667 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2668 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2669 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2670 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2671 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2672 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2673 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2674 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2675 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2676 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2677 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2678 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2679 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2680 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2681 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2682 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2683 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2684 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2685 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2686 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2687 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2688 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2689 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2690 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2691 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2692 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2693 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2694 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2695 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2696 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2697 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2698 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2699 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2700 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2701 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2702 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2703 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2704 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2705 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2706 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2707 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2708 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2709 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2710 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2711 
2712 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2713 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2714 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2715 				NUM_BANKS(ADDR_SURF_16_BANK));
2716 
2717 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2718 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2719 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2720 				NUM_BANKS(ADDR_SURF_16_BANK));
2721 
2722 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2723 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2724 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2725 				NUM_BANKS(ADDR_SURF_16_BANK));
2726 
2727 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2728 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2729 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2730 				NUM_BANKS(ADDR_SURF_16_BANK));
2731 
2732 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2733 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2734 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2735 				NUM_BANKS(ADDR_SURF_16_BANK));
2736 
2737 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2738 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2739 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2740 				NUM_BANKS(ADDR_SURF_16_BANK));
2741 
2742 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2743 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2744 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2745 				NUM_BANKS(ADDR_SURF_16_BANK));
2746 
2747 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2748 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2749 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2750 				NUM_BANKS(ADDR_SURF_16_BANK));
2751 
2752 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2753 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2754 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2755 				NUM_BANKS(ADDR_SURF_16_BANK));
2756 
2757 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2758 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2759 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2760 				NUM_BANKS(ADDR_SURF_16_BANK));
2761 
2762 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2764 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2765 				NUM_BANKS(ADDR_SURF_16_BANK));
2766 
2767 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2768 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2769 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2770 				NUM_BANKS(ADDR_SURF_16_BANK));
2771 
2772 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2773 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2774 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2775 				NUM_BANKS(ADDR_SURF_8_BANK));
2776 
2777 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2778 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2779 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2780 				NUM_BANKS(ADDR_SURF_4_BANK));
2781 
2782 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2783 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2784 
2785 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2786 			if (reg_offset != 7)
2787 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2788 
2789 		break;
2790 	case CHIP_POLARIS10:
2791 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2792 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2793 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2794 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2795 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2796 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2797 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2798 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2799 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2800 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2801 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2802 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2803 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2804 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2805 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2806 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2807 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2808 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2809 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2810 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2811 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2812 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2813 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2814 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2815 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2816 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2817 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2818 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2819 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2820 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2821 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2822 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2823 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2824 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2825 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2826 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2827 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2828 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2829 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2830 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2831 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2832 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2833 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2834 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2835 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2836 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2837 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2838 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2839 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2840 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2841 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2842 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2843 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2844 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2845 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2846 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2847 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2848 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2849 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2850 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2851 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2852 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2853 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2854 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2855 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2856 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2857 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2858 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2859 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2860 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2861 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2862 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2863 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2864 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2865 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2866 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2867 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2868 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2869 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2870 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2871 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2872 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2873 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2874 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2875 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2876 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2877 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2878 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2879 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2880 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2881 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2882 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2883 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2884 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2885 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2886 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2887 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2888 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2889 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2890 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2891 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2892 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2893 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2894 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2895 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2896 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2897 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2898 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2899 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2900 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2901 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2902 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2903 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2904 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2905 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2906 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2907 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2908 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2909 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2910 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2911 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2912 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2913 
2914 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2915 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2916 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2917 				NUM_BANKS(ADDR_SURF_16_BANK));
2918 
2919 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2920 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2921 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2922 				NUM_BANKS(ADDR_SURF_16_BANK));
2923 
2924 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2925 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2926 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2927 				NUM_BANKS(ADDR_SURF_16_BANK));
2928 
2929 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2930 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2931 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2932 				NUM_BANKS(ADDR_SURF_16_BANK));
2933 
2934 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2935 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2936 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2937 				NUM_BANKS(ADDR_SURF_16_BANK));
2938 
2939 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2940 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2941 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2942 				NUM_BANKS(ADDR_SURF_16_BANK));
2943 
2944 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2945 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2946 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2947 				NUM_BANKS(ADDR_SURF_16_BANK));
2948 
2949 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2950 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2951 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2952 				NUM_BANKS(ADDR_SURF_16_BANK));
2953 
2954 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2955 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2956 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2957 				NUM_BANKS(ADDR_SURF_16_BANK));
2958 
2959 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2960 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2961 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2962 				NUM_BANKS(ADDR_SURF_16_BANK));
2963 
2964 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2965 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2966 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2967 				NUM_BANKS(ADDR_SURF_16_BANK));
2968 
2969 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2970 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2971 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2972 				NUM_BANKS(ADDR_SURF_8_BANK));
2973 
2974 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2975 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2976 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2977 				NUM_BANKS(ADDR_SURF_4_BANK));
2978 
2979 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2980 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2981 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2982 				NUM_BANKS(ADDR_SURF_4_BANK));
2983 
2984 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2985 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2986 
2987 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2988 			if (reg_offset != 7)
2989 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2990 
2991 		break;
2992 	case CHIP_STONEY:
2993 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2994 				PIPE_CONFIG(ADDR_SURF_P2) |
2995 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2996 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2997 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2998 				PIPE_CONFIG(ADDR_SURF_P2) |
2999 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3000 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3001 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3002 				PIPE_CONFIG(ADDR_SURF_P2) |
3003 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3004 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3005 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3006 				PIPE_CONFIG(ADDR_SURF_P2) |
3007 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3008 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3009 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3010 				PIPE_CONFIG(ADDR_SURF_P2) |
3011 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3012 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3013 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3014 				PIPE_CONFIG(ADDR_SURF_P2) |
3015 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3016 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3017 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3018 				PIPE_CONFIG(ADDR_SURF_P2) |
3019 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3020 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3021 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3022 				PIPE_CONFIG(ADDR_SURF_P2));
3023 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3024 				PIPE_CONFIG(ADDR_SURF_P2) |
3025 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3026 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3027 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3028 				 PIPE_CONFIG(ADDR_SURF_P2) |
3029 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3030 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3031 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3032 				 PIPE_CONFIG(ADDR_SURF_P2) |
3033 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3034 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3035 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3036 				 PIPE_CONFIG(ADDR_SURF_P2) |
3037 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3038 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3039 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3040 				 PIPE_CONFIG(ADDR_SURF_P2) |
3041 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3042 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3043 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3044 				 PIPE_CONFIG(ADDR_SURF_P2) |
3045 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3046 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3047 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3048 				 PIPE_CONFIG(ADDR_SURF_P2) |
3049 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3050 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3051 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3052 				 PIPE_CONFIG(ADDR_SURF_P2) |
3053 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3054 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3055 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3056 				 PIPE_CONFIG(ADDR_SURF_P2) |
3057 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3058 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3059 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3060 				 PIPE_CONFIG(ADDR_SURF_P2) |
3061 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3062 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3063 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3064 				 PIPE_CONFIG(ADDR_SURF_P2) |
3065 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3066 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3067 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3068 				 PIPE_CONFIG(ADDR_SURF_P2) |
3069 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3070 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3071 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3072 				 PIPE_CONFIG(ADDR_SURF_P2) |
3073 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3074 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3075 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3076 				 PIPE_CONFIG(ADDR_SURF_P2) |
3077 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3078 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3079 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3080 				 PIPE_CONFIG(ADDR_SURF_P2) |
3081 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3082 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3083 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3084 				 PIPE_CONFIG(ADDR_SURF_P2) |
3085 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3086 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3087 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3088 				 PIPE_CONFIG(ADDR_SURF_P2) |
3089 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3090 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3091 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3092 				 PIPE_CONFIG(ADDR_SURF_P2) |
3093 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3094 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3095 
3096 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3097 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3098 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3099 				NUM_BANKS(ADDR_SURF_8_BANK));
3100 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3101 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3102 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3103 				NUM_BANKS(ADDR_SURF_8_BANK));
3104 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3105 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3106 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3107 				NUM_BANKS(ADDR_SURF_8_BANK));
3108 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3109 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3110 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3111 				NUM_BANKS(ADDR_SURF_8_BANK));
3112 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3113 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3114 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3115 				NUM_BANKS(ADDR_SURF_8_BANK));
3116 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3117 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3118 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3119 				NUM_BANKS(ADDR_SURF_8_BANK));
3120 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3121 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3122 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3123 				NUM_BANKS(ADDR_SURF_8_BANK));
3124 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3125 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3126 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3127 				NUM_BANKS(ADDR_SURF_16_BANK));
3128 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3129 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3130 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3131 				NUM_BANKS(ADDR_SURF_16_BANK));
3132 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3133 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3134 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3135 				 NUM_BANKS(ADDR_SURF_16_BANK));
3136 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3137 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3138 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3139 				 NUM_BANKS(ADDR_SURF_16_BANK));
3140 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3141 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3142 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3143 				 NUM_BANKS(ADDR_SURF_16_BANK));
3144 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3145 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3146 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3147 				 NUM_BANKS(ADDR_SURF_16_BANK));
3148 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3149 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3150 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3151 				 NUM_BANKS(ADDR_SURF_8_BANK));
3152 
3153 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3154 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3155 			    reg_offset != 23)
3156 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3157 
3158 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3159 			if (reg_offset != 7)
3160 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3161 
3162 		break;
3163 	default:
3164 		dev_warn(adev->dev,
3165 			 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3166 			 adev->asic_type);
3167 
3168 	case CHIP_CARRIZO:
3169 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3170 				PIPE_CONFIG(ADDR_SURF_P2) |
3171 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3172 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3173 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3174 				PIPE_CONFIG(ADDR_SURF_P2) |
3175 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3176 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3177 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3178 				PIPE_CONFIG(ADDR_SURF_P2) |
3179 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3180 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3181 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3182 				PIPE_CONFIG(ADDR_SURF_P2) |
3183 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3184 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3185 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3186 				PIPE_CONFIG(ADDR_SURF_P2) |
3187 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3188 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3189 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3190 				PIPE_CONFIG(ADDR_SURF_P2) |
3191 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3192 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3193 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3194 				PIPE_CONFIG(ADDR_SURF_P2) |
3195 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3196 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3197 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3198 				PIPE_CONFIG(ADDR_SURF_P2));
3199 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3200 				PIPE_CONFIG(ADDR_SURF_P2) |
3201 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3202 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3203 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3204 				 PIPE_CONFIG(ADDR_SURF_P2) |
3205 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3206 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3207 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3208 				 PIPE_CONFIG(ADDR_SURF_P2) |
3209 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3210 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3211 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3212 				 PIPE_CONFIG(ADDR_SURF_P2) |
3213 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3214 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3215 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3216 				 PIPE_CONFIG(ADDR_SURF_P2) |
3217 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3218 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3219 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3220 				 PIPE_CONFIG(ADDR_SURF_P2) |
3221 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3222 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3223 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3224 				 PIPE_CONFIG(ADDR_SURF_P2) |
3225 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3226 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3227 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3228 				 PIPE_CONFIG(ADDR_SURF_P2) |
3229 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3230 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3231 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3232 				 PIPE_CONFIG(ADDR_SURF_P2) |
3233 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3234 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3235 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3236 				 PIPE_CONFIG(ADDR_SURF_P2) |
3237 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3238 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3239 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3240 				 PIPE_CONFIG(ADDR_SURF_P2) |
3241 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3242 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3243 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3244 				 PIPE_CONFIG(ADDR_SURF_P2) |
3245 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3246 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3247 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3248 				 PIPE_CONFIG(ADDR_SURF_P2) |
3249 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3250 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3251 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3252 				 PIPE_CONFIG(ADDR_SURF_P2) |
3253 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3254 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3255 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3256 				 PIPE_CONFIG(ADDR_SURF_P2) |
3257 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3258 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3259 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3260 				 PIPE_CONFIG(ADDR_SURF_P2) |
3261 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3262 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3263 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3264 				 PIPE_CONFIG(ADDR_SURF_P2) |
3265 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3266 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3267 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3268 				 PIPE_CONFIG(ADDR_SURF_P2) |
3269 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3270 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3271 
3272 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3273 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3274 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3275 				NUM_BANKS(ADDR_SURF_8_BANK));
3276 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3277 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3278 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3279 				NUM_BANKS(ADDR_SURF_8_BANK));
3280 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3281 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3282 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3283 				NUM_BANKS(ADDR_SURF_8_BANK));
3284 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3285 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3286 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3287 				NUM_BANKS(ADDR_SURF_8_BANK));
3288 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3289 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3290 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3291 				NUM_BANKS(ADDR_SURF_8_BANK));
3292 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3293 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3294 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3295 				NUM_BANKS(ADDR_SURF_8_BANK));
3296 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3297 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3298 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3299 				NUM_BANKS(ADDR_SURF_8_BANK));
3300 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3301 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3302 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3303 				NUM_BANKS(ADDR_SURF_16_BANK));
3304 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3305 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3306 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3307 				NUM_BANKS(ADDR_SURF_16_BANK));
3308 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3309 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3310 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3311 				 NUM_BANKS(ADDR_SURF_16_BANK));
3312 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3313 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3314 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3315 				 NUM_BANKS(ADDR_SURF_16_BANK));
3316 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3317 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3318 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3319 				 NUM_BANKS(ADDR_SURF_16_BANK));
3320 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3321 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3322 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3323 				 NUM_BANKS(ADDR_SURF_16_BANK));
3324 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3325 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3326 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3327 				 NUM_BANKS(ADDR_SURF_8_BANK));
3328 
3329 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3330 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3331 			    reg_offset != 23)
3332 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3333 
3334 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3335 			if (reg_offset != 7)
3336 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3337 
3338 		break;
3339 	}
3340 }
3341 
3342 void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
3343 {
3344 	u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3345 
3346 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
3347 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3348 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3349 	} else if (se_num == 0xffffffff) {
3350 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3351 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3352 	} else if (sh_num == 0xffffffff) {
3353 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3354 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3355 	} else {
3356 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3357 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3358 	}
3359 	WREG32(mmGRBM_GFX_INDEX, data);
3360 }
3361 
3362 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3363 {
3364 	return (u32)((1ULL << bit_width) - 1);
3365 }
3366 
3367 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3368 {
3369 	u32 data, mask;
3370 
3371 	data = RREG32(mmCC_RB_BACKEND_DISABLE);
3372 	data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3373 
3374 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
3375 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
3376 
3377 	mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3378 				       adev->gfx.config.max_sh_per_se);
3379 
3380 	return (~data) & mask;
3381 }
3382 
3383 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3384 {
3385 	int i, j;
3386 	u32 data;
3387 	u32 active_rbs = 0;
3388 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3389 					adev->gfx.config.max_sh_per_se;
3390 
3391 	mutex_lock(&adev->grbm_idx_mutex);
3392 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3393 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3394 			gfx_v8_0_select_se_sh(adev, i, j);
3395 			data = gfx_v8_0_get_rb_active_bitmap(adev);
3396 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3397 					       rb_bitmap_width_per_sh);
3398 		}
3399 	}
3400 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3401 	mutex_unlock(&adev->grbm_idx_mutex);
3402 
3403 	adev->gfx.config.backend_enable_mask = active_rbs;
3404 	adev->gfx.config.num_rbs = hweight32(active_rbs);
3405 }
3406 
3407 /**
3408  * gfx_v8_0_init_compute_vmid - gart enable
3409  *
3410  * @rdev: amdgpu_device pointer
3411  *
3412  * Initialize compute vmid sh_mem registers
3413  *
3414  */
3415 #define DEFAULT_SH_MEM_BASES	(0x6000)
3416 #define FIRST_COMPUTE_VMID	(8)
3417 #define LAST_COMPUTE_VMID	(16)
3418 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3419 {
3420 	int i;
3421 	uint32_t sh_mem_config;
3422 	uint32_t sh_mem_bases;
3423 
3424 	/*
3425 	 * Configure apertures:
3426 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3427 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3428 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3429 	 */
3430 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3431 
3432 	sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3433 			SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3434 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3435 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3436 			MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3437 			SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3438 
3439 	mutex_lock(&adev->srbm_mutex);
3440 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3441 		vi_srbm_select(adev, 0, 0, 0, i);
3442 		/* CP and shaders */
3443 		WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3444 		WREG32(mmSH_MEM_APE1_BASE, 1);
3445 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3446 		WREG32(mmSH_MEM_BASES, sh_mem_bases);
3447 	}
3448 	vi_srbm_select(adev, 0, 0, 0, 0);
3449 	mutex_unlock(&adev->srbm_mutex);
3450 }
3451 
3452 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3453 {
3454 	u32 tmp;
3455 	int i;
3456 
3457 	tmp = RREG32(mmGRBM_CNTL);
3458 	tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
3459 	WREG32(mmGRBM_CNTL, tmp);
3460 
3461 	WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3462 	WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3463 	WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3464 
3465 	gfx_v8_0_tiling_mode_table_init(adev);
3466 
3467 	gfx_v8_0_setup_rb(adev);
3468 	gfx_v8_0_get_cu_info(adev);
3469 
3470 	/* XXX SH_MEM regs */
3471 	/* where to put LDS, scratch, GPUVM in FSA64 space */
3472 	mutex_lock(&adev->srbm_mutex);
3473 	for (i = 0; i < 16; i++) {
3474 		vi_srbm_select(adev, 0, 0, 0, i);
3475 		/* CP and shaders */
3476 		if (i == 0) {
3477 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3478 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3479 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3480 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3481 			WREG32(mmSH_MEM_CONFIG, tmp);
3482 		} else {
3483 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3484 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3485 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3486 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3487 			WREG32(mmSH_MEM_CONFIG, tmp);
3488 		}
3489 
3490 		WREG32(mmSH_MEM_APE1_BASE, 1);
3491 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3492 		WREG32(mmSH_MEM_BASES, 0);
3493 	}
3494 	vi_srbm_select(adev, 0, 0, 0, 0);
3495 	mutex_unlock(&adev->srbm_mutex);
3496 
3497 	gfx_v8_0_init_compute_vmid(adev);
3498 
3499 	mutex_lock(&adev->grbm_idx_mutex);
3500 	/*
3501 	 * making sure that the following register writes will be broadcasted
3502 	 * to all the shaders
3503 	 */
3504 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3505 
3506 	WREG32(mmPA_SC_FIFO_SIZE,
3507 		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
3508 			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3509 		   (adev->gfx.config.sc_prim_fifo_size_backend <<
3510 			PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3511 		   (adev->gfx.config.sc_hiz_tile_fifo_size <<
3512 			PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3513 		   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3514 			PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3515 	mutex_unlock(&adev->grbm_idx_mutex);
3516 
3517 }
3518 
3519 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3520 {
3521 	u32 i, j, k;
3522 	u32 mask;
3523 
3524 	mutex_lock(&adev->grbm_idx_mutex);
3525 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3526 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3527 			gfx_v8_0_select_se_sh(adev, i, j);
3528 			for (k = 0; k < adev->usec_timeout; k++) {
3529 				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3530 					break;
3531 				udelay(1);
3532 			}
3533 		}
3534 	}
3535 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3536 	mutex_unlock(&adev->grbm_idx_mutex);
3537 
3538 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3539 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3540 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3541 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3542 	for (k = 0; k < adev->usec_timeout; k++) {
3543 		if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3544 			break;
3545 		udelay(1);
3546 	}
3547 }
3548 
3549 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3550 					       bool enable)
3551 {
3552 	u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3553 
3554 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3555 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3556 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3557 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3558 
3559 	WREG32(mmCP_INT_CNTL_RING0, tmp);
3560 }
3561 
3562 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3563 {
3564 	/* csib */
3565 	WREG32(mmRLC_CSIB_ADDR_HI,
3566 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
3567 	WREG32(mmRLC_CSIB_ADDR_LO,
3568 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3569 	WREG32(mmRLC_CSIB_LENGTH,
3570 			adev->gfx.rlc.clear_state_size);
3571 }
3572 
3573 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3574 				int ind_offset,
3575 				int list_size,
3576 				int *unique_indices,
3577 				int *indices_count,
3578 				int max_indices,
3579 				int *ind_start_offsets,
3580 				int *offset_count,
3581 				int max_offset)
3582 {
3583 	int indices;
3584 	bool new_entry = true;
3585 
3586 	for (; ind_offset < list_size; ind_offset++) {
3587 
3588 		if (new_entry) {
3589 			new_entry = false;
3590 			ind_start_offsets[*offset_count] = ind_offset;
3591 			*offset_count = *offset_count + 1;
3592 			BUG_ON(*offset_count >= max_offset);
3593 		}
3594 
3595 		if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3596 			new_entry = true;
3597 			continue;
3598 		}
3599 
3600 		ind_offset += 2;
3601 
3602 		/* look for the matching indice */
3603 		for (indices = 0;
3604 			indices < *indices_count;
3605 			indices++) {
3606 			if (unique_indices[indices] ==
3607 				register_list_format[ind_offset])
3608 				break;
3609 		}
3610 
3611 		if (indices >= *indices_count) {
3612 			unique_indices[*indices_count] =
3613 				register_list_format[ind_offset];
3614 			indices = *indices_count;
3615 			*indices_count = *indices_count + 1;
3616 			BUG_ON(*indices_count >= max_indices);
3617 		}
3618 
3619 		register_list_format[ind_offset] = indices;
3620 	}
3621 }
3622 
3623 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3624 {
3625 	int i, temp, data;
3626 	int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3627 	int indices_count = 0;
3628 	int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3629 	int offset_count = 0;
3630 
3631 	int list_size;
3632 	unsigned int *register_list_format =
3633 		kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3634 	if (register_list_format == NULL)
3635 		return -ENOMEM;
3636 	memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3637 			adev->gfx.rlc.reg_list_format_size_bytes);
3638 
3639 	gfx_v8_0_parse_ind_reg_list(register_list_format,
3640 				RLC_FormatDirectRegListLength,
3641 				adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3642 				unique_indices,
3643 				&indices_count,
3644 				sizeof(unique_indices) / sizeof(int),
3645 				indirect_start_offsets,
3646 				&offset_count,
3647 				sizeof(indirect_start_offsets)/sizeof(int));
3648 
3649 	/* save and restore list */
3650 	temp = RREG32(mmRLC_SRM_CNTL);
3651 	temp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
3652 	WREG32(mmRLC_SRM_CNTL, temp);
3653 
3654 	WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3655 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3656 		WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3657 
3658 	/* indirect list */
3659 	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3660 	for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3661 		WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3662 
3663 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3664 	list_size = list_size >> 1;
3665 	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3666 	WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3667 
3668 	/* starting offsets starts */
3669 	WREG32(mmRLC_GPM_SCRATCH_ADDR,
3670 		adev->gfx.rlc.starting_offsets_start);
3671 	for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3672 		WREG32(mmRLC_GPM_SCRATCH_DATA,
3673 				indirect_start_offsets[i]);
3674 
3675 	/* unique indices */
3676 	temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3677 	data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3678 	for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3679 		amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false);
3680 		amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false);
3681 	}
3682 	kfree(register_list_format);
3683 
3684 	return 0;
3685 }
3686 
3687 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3688 {
3689 	uint32_t data;
3690 
3691 	data = RREG32(mmRLC_SRM_CNTL);
3692 	data |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
3693 	WREG32(mmRLC_SRM_CNTL, data);
3694 }
3695 
3696 static void polaris11_init_power_gating(struct amdgpu_device *adev)
3697 {
3698 	uint32_t data;
3699 
3700 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3701 			AMD_PG_SUPPORT_GFX_SMG |
3702 			AMD_PG_SUPPORT_GFX_DMG)) {
3703 		data = RREG32(mmCP_RB_WPTR_POLL_CNTL);
3704 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
3705 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
3706 		WREG32(mmCP_RB_WPTR_POLL_CNTL, data);
3707 
3708 		data = 0;
3709 		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
3710 		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
3711 		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
3712 		data |= (0x10 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
3713 		WREG32(mmRLC_PG_DELAY, data);
3714 
3715 		data = RREG32(mmRLC_PG_DELAY_2);
3716 		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
3717 		data |= (0x3 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
3718 		WREG32(mmRLC_PG_DELAY_2, data);
3719 
3720 		data = RREG32(mmRLC_AUTO_PG_CTRL);
3721 		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
3722 		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
3723 		WREG32(mmRLC_AUTO_PG_CTRL, data);
3724 	}
3725 }
3726 
3727 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
3728 {
3729 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3730 			      AMD_PG_SUPPORT_GFX_SMG |
3731 			      AMD_PG_SUPPORT_GFX_DMG |
3732 			      AMD_PG_SUPPORT_CP |
3733 			      AMD_PG_SUPPORT_GDS |
3734 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
3735 		gfx_v8_0_init_csb(adev);
3736 		gfx_v8_0_init_save_restore_list(adev);
3737 		gfx_v8_0_enable_save_restore_machine(adev);
3738 
3739 		if (adev->asic_type == CHIP_POLARIS11)
3740 			polaris11_init_power_gating(adev);
3741 	}
3742 }
3743 
3744 void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
3745 {
3746 	u32 tmp = RREG32(mmRLC_CNTL);
3747 
3748 	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
3749 	WREG32(mmRLC_CNTL, tmp);
3750 
3751 	gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3752 
3753 	gfx_v8_0_wait_for_rlc_serdes(adev);
3754 }
3755 
3756 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
3757 {
3758 	u32 tmp = RREG32(mmGRBM_SOFT_RESET);
3759 
3760 	tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3761 	WREG32(mmGRBM_SOFT_RESET, tmp);
3762 	udelay(50);
3763 	tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3764 	WREG32(mmGRBM_SOFT_RESET, tmp);
3765 	udelay(50);
3766 }
3767 
3768 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
3769 {
3770 	u32 tmp = RREG32(mmRLC_CNTL);
3771 
3772 	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
3773 	WREG32(mmRLC_CNTL, tmp);
3774 
3775 	/* carrizo do enable cp interrupt after cp inited */
3776 	if (!(adev->flags & AMD_IS_APU))
3777 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3778 
3779 	udelay(50);
3780 }
3781 
3782 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
3783 {
3784 	const struct rlc_firmware_header_v2_0 *hdr;
3785 	const __le32 *fw_data;
3786 	unsigned i, fw_size;
3787 
3788 	if (!adev->gfx.rlc_fw)
3789 		return -EINVAL;
3790 
3791 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3792 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
3793 
3794 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3795 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3796 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3797 
3798 	WREG32(mmRLC_GPM_UCODE_ADDR, 0);
3799 	for (i = 0; i < fw_size; i++)
3800 		WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3801 	WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3802 
3803 	return 0;
3804 }
3805 
3806 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
3807 {
3808 	int r;
3809 
3810 	gfx_v8_0_rlc_stop(adev);
3811 
3812 	/* disable CG */
3813 	WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
3814 	if (adev->asic_type == CHIP_POLARIS11 ||
3815 		adev->asic_type == CHIP_POLARIS10)
3816 		WREG32(mmRLC_CGCG_CGLS_CTRL_3D, 0);
3817 
3818 	/* disable PG */
3819 	WREG32(mmRLC_PG_CNTL, 0);
3820 
3821 	gfx_v8_0_rlc_reset(adev);
3822 
3823 	gfx_v8_0_init_pg(adev);
3824 
3825 	if (!adev->pp_enabled) {
3826 		if (!adev->firmware.smu_load) {
3827 			/* legacy rlc firmware loading */
3828 			r = gfx_v8_0_rlc_load_microcode(adev);
3829 			if (r)
3830 				return r;
3831 		} else {
3832 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3833 							AMDGPU_UCODE_ID_RLC_G);
3834 			if (r)
3835 				return -EINVAL;
3836 		}
3837 	}
3838 
3839 	gfx_v8_0_rlc_start(adev);
3840 
3841 	return 0;
3842 }
3843 
3844 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3845 {
3846 	int i;
3847 	u32 tmp = RREG32(mmCP_ME_CNTL);
3848 
3849 	if (enable) {
3850 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
3851 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
3852 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
3853 	} else {
3854 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
3855 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
3856 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
3857 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3858 			adev->gfx.gfx_ring[i].ready = false;
3859 	}
3860 	WREG32(mmCP_ME_CNTL, tmp);
3861 	udelay(50);
3862 }
3863 
3864 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3865 {
3866 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
3867 	const struct gfx_firmware_header_v1_0 *ce_hdr;
3868 	const struct gfx_firmware_header_v1_0 *me_hdr;
3869 	const __le32 *fw_data;
3870 	unsigned i, fw_size;
3871 
3872 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3873 		return -EINVAL;
3874 
3875 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3876 		adev->gfx.pfp_fw->data;
3877 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3878 		adev->gfx.ce_fw->data;
3879 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
3880 		adev->gfx.me_fw->data;
3881 
3882 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3883 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3884 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3885 
3886 	gfx_v8_0_cp_gfx_enable(adev, false);
3887 
3888 	/* PFP */
3889 	fw_data = (const __le32 *)
3890 		(adev->gfx.pfp_fw->data +
3891 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3892 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3893 	WREG32(mmCP_PFP_UCODE_ADDR, 0);
3894 	for (i = 0; i < fw_size; i++)
3895 		WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3896 	WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3897 
3898 	/* CE */
3899 	fw_data = (const __le32 *)
3900 		(adev->gfx.ce_fw->data +
3901 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3902 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3903 	WREG32(mmCP_CE_UCODE_ADDR, 0);
3904 	for (i = 0; i < fw_size; i++)
3905 		WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3906 	WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3907 
3908 	/* ME */
3909 	fw_data = (const __le32 *)
3910 		(adev->gfx.me_fw->data +
3911 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3912 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3913 	WREG32(mmCP_ME_RAM_WADDR, 0);
3914 	for (i = 0; i < fw_size; i++)
3915 		WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3916 	WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3917 
3918 	return 0;
3919 }
3920 
3921 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
3922 {
3923 	u32 count = 0;
3924 	const struct cs_section_def *sect = NULL;
3925 	const struct cs_extent_def *ext = NULL;
3926 
3927 	/* begin clear state */
3928 	count += 2;
3929 	/* context control state */
3930 	count += 3;
3931 
3932 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3933 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3934 			if (sect->id == SECT_CONTEXT)
3935 				count += 2 + ext->reg_count;
3936 			else
3937 				return 0;
3938 		}
3939 	}
3940 	/* pa_sc_raster_config/pa_sc_raster_config1 */
3941 	count += 4;
3942 	/* end clear state */
3943 	count += 2;
3944 	/* clear state */
3945 	count += 2;
3946 
3947 	return count;
3948 }
3949 
3950 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
3951 {
3952 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3953 	const struct cs_section_def *sect = NULL;
3954 	const struct cs_extent_def *ext = NULL;
3955 	int r, i;
3956 
3957 	/* init the CP */
3958 	WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3959 	WREG32(mmCP_ENDIAN_SWAP, 0);
3960 	WREG32(mmCP_DEVICE_ID, 1);
3961 
3962 	gfx_v8_0_cp_gfx_enable(adev, true);
3963 
3964 	r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
3965 	if (r) {
3966 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3967 		return r;
3968 	}
3969 
3970 	/* clear state buffer */
3971 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3972 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3973 
3974 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3975 	amdgpu_ring_write(ring, 0x80000000);
3976 	amdgpu_ring_write(ring, 0x80000000);
3977 
3978 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3979 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3980 			if (sect->id == SECT_CONTEXT) {
3981 				amdgpu_ring_write(ring,
3982 				       PACKET3(PACKET3_SET_CONTEXT_REG,
3983 					       ext->reg_count));
3984 				amdgpu_ring_write(ring,
3985 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3986 				for (i = 0; i < ext->reg_count; i++)
3987 					amdgpu_ring_write(ring, ext->extent[i]);
3988 			}
3989 		}
3990 	}
3991 
3992 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3993 	amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
3994 	switch (adev->asic_type) {
3995 	case CHIP_TONGA:
3996 	case CHIP_POLARIS10:
3997 		amdgpu_ring_write(ring, 0x16000012);
3998 		amdgpu_ring_write(ring, 0x0000002A);
3999 		break;
4000 	case CHIP_POLARIS11:
4001 		amdgpu_ring_write(ring, 0x16000012);
4002 		amdgpu_ring_write(ring, 0x00000000);
4003 		break;
4004 	case CHIP_FIJI:
4005 		amdgpu_ring_write(ring, 0x3a00161a);
4006 		amdgpu_ring_write(ring, 0x0000002e);
4007 		break;
4008 	case CHIP_CARRIZO:
4009 		amdgpu_ring_write(ring, 0x00000002);
4010 		amdgpu_ring_write(ring, 0x00000000);
4011 		break;
4012 	case CHIP_TOPAZ:
4013 		amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4014 				0x00000000 : 0x00000002);
4015 		amdgpu_ring_write(ring, 0x00000000);
4016 		break;
4017 	case CHIP_STONEY:
4018 		amdgpu_ring_write(ring, 0x00000000);
4019 		amdgpu_ring_write(ring, 0x00000000);
4020 		break;
4021 	default:
4022 		BUG();
4023 	}
4024 
4025 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4026 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4027 
4028 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4029 	amdgpu_ring_write(ring, 0);
4030 
4031 	/* init the CE partitions */
4032 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4033 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4034 	amdgpu_ring_write(ring, 0x8000);
4035 	amdgpu_ring_write(ring, 0x8000);
4036 
4037 	amdgpu_ring_commit(ring);
4038 
4039 	return 0;
4040 }
4041 
4042 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4043 {
4044 	struct amdgpu_ring *ring;
4045 	u32 tmp;
4046 	u32 rb_bufsz;
4047 	u64 rb_addr, rptr_addr;
4048 	int r;
4049 
4050 	/* Set the write pointer delay */
4051 	WREG32(mmCP_RB_WPTR_DELAY, 0);
4052 
4053 	/* set the RB to use vmid 0 */
4054 	WREG32(mmCP_RB_VMID, 0);
4055 
4056 	/* Set ring buffer size */
4057 	ring = &adev->gfx.gfx_ring[0];
4058 	rb_bufsz = order_base_2(ring->ring_size / 8);
4059 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4060 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4061 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4062 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4063 #ifdef __BIG_ENDIAN
4064 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4065 #endif
4066 	WREG32(mmCP_RB0_CNTL, tmp);
4067 
4068 	/* Initialize the ring buffer's read and write pointers */
4069 	WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4070 	ring->wptr = 0;
4071 	WREG32(mmCP_RB0_WPTR, ring->wptr);
4072 
4073 	/* set the wb address wether it's enabled or not */
4074 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4075 	WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4076 	WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4077 
4078 	mdelay(1);
4079 	WREG32(mmCP_RB0_CNTL, tmp);
4080 
4081 	rb_addr = ring->gpu_addr >> 8;
4082 	WREG32(mmCP_RB0_BASE, rb_addr);
4083 	WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4084 
4085 	/* no gfx doorbells on iceland */
4086 	if (adev->asic_type != CHIP_TOPAZ) {
4087 		tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4088 		if (ring->use_doorbell) {
4089 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4090 					    DOORBELL_OFFSET, ring->doorbell_index);
4091 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4092 					    DOORBELL_HIT, 0);
4093 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4094 					    DOORBELL_EN, 1);
4095 		} else {
4096 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4097 					    DOORBELL_EN, 0);
4098 		}
4099 		WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4100 
4101 		if (adev->asic_type == CHIP_TONGA) {
4102 			tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4103 					    DOORBELL_RANGE_LOWER,
4104 					    AMDGPU_DOORBELL_GFX_RING0);
4105 			WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4106 
4107 			WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4108 			       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4109 		}
4110 
4111 	}
4112 
4113 	/* start the ring */
4114 	gfx_v8_0_cp_gfx_start(adev);
4115 	ring->ready = true;
4116 	r = amdgpu_ring_test_ring(ring);
4117 	if (r) {
4118 		ring->ready = false;
4119 		return r;
4120 	}
4121 
4122 	return 0;
4123 }
4124 
4125 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4126 {
4127 	int i;
4128 
4129 	if (enable) {
4130 		WREG32(mmCP_MEC_CNTL, 0);
4131 	} else {
4132 		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4133 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
4134 			adev->gfx.compute_ring[i].ready = false;
4135 	}
4136 	udelay(50);
4137 }
4138 
4139 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4140 {
4141 	const struct gfx_firmware_header_v1_0 *mec_hdr;
4142 	const __le32 *fw_data;
4143 	unsigned i, fw_size;
4144 
4145 	if (!adev->gfx.mec_fw)
4146 		return -EINVAL;
4147 
4148 	gfx_v8_0_cp_compute_enable(adev, false);
4149 
4150 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4151 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4152 
4153 	fw_data = (const __le32 *)
4154 		(adev->gfx.mec_fw->data +
4155 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4156 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4157 
4158 	/* MEC1 */
4159 	WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4160 	for (i = 0; i < fw_size; i++)
4161 		WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4162 	WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4163 
4164 	/* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4165 	if (adev->gfx.mec2_fw) {
4166 		const struct gfx_firmware_header_v1_0 *mec2_hdr;
4167 
4168 		mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4169 		amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4170 
4171 		fw_data = (const __le32 *)
4172 			(adev->gfx.mec2_fw->data +
4173 			 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4174 		fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4175 
4176 		WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4177 		for (i = 0; i < fw_size; i++)
4178 			WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4179 		WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4180 	}
4181 
4182 	return 0;
4183 }
4184 
4185 struct vi_mqd {
4186 	uint32_t header;  /* ordinal0 */
4187 	uint32_t compute_dispatch_initiator;  /* ordinal1 */
4188 	uint32_t compute_dim_x;  /* ordinal2 */
4189 	uint32_t compute_dim_y;  /* ordinal3 */
4190 	uint32_t compute_dim_z;  /* ordinal4 */
4191 	uint32_t compute_start_x;  /* ordinal5 */
4192 	uint32_t compute_start_y;  /* ordinal6 */
4193 	uint32_t compute_start_z;  /* ordinal7 */
4194 	uint32_t compute_num_thread_x;  /* ordinal8 */
4195 	uint32_t compute_num_thread_y;  /* ordinal9 */
4196 	uint32_t compute_num_thread_z;  /* ordinal10 */
4197 	uint32_t compute_pipelinestat_enable;  /* ordinal11 */
4198 	uint32_t compute_perfcount_enable;  /* ordinal12 */
4199 	uint32_t compute_pgm_lo;  /* ordinal13 */
4200 	uint32_t compute_pgm_hi;  /* ordinal14 */
4201 	uint32_t compute_tba_lo;  /* ordinal15 */
4202 	uint32_t compute_tba_hi;  /* ordinal16 */
4203 	uint32_t compute_tma_lo;  /* ordinal17 */
4204 	uint32_t compute_tma_hi;  /* ordinal18 */
4205 	uint32_t compute_pgm_rsrc1;  /* ordinal19 */
4206 	uint32_t compute_pgm_rsrc2;  /* ordinal20 */
4207 	uint32_t compute_vmid;  /* ordinal21 */
4208 	uint32_t compute_resource_limits;  /* ordinal22 */
4209 	uint32_t compute_static_thread_mgmt_se0;  /* ordinal23 */
4210 	uint32_t compute_static_thread_mgmt_se1;  /* ordinal24 */
4211 	uint32_t compute_tmpring_size;  /* ordinal25 */
4212 	uint32_t compute_static_thread_mgmt_se2;  /* ordinal26 */
4213 	uint32_t compute_static_thread_mgmt_se3;  /* ordinal27 */
4214 	uint32_t compute_restart_x;  /* ordinal28 */
4215 	uint32_t compute_restart_y;  /* ordinal29 */
4216 	uint32_t compute_restart_z;  /* ordinal30 */
4217 	uint32_t compute_thread_trace_enable;  /* ordinal31 */
4218 	uint32_t compute_misc_reserved;  /* ordinal32 */
4219 	uint32_t compute_dispatch_id;  /* ordinal33 */
4220 	uint32_t compute_threadgroup_id;  /* ordinal34 */
4221 	uint32_t compute_relaunch;  /* ordinal35 */
4222 	uint32_t compute_wave_restore_addr_lo;  /* ordinal36 */
4223 	uint32_t compute_wave_restore_addr_hi;  /* ordinal37 */
4224 	uint32_t compute_wave_restore_control;  /* ordinal38 */
4225 	uint32_t reserved9;  /* ordinal39 */
4226 	uint32_t reserved10;  /* ordinal40 */
4227 	uint32_t reserved11;  /* ordinal41 */
4228 	uint32_t reserved12;  /* ordinal42 */
4229 	uint32_t reserved13;  /* ordinal43 */
4230 	uint32_t reserved14;  /* ordinal44 */
4231 	uint32_t reserved15;  /* ordinal45 */
4232 	uint32_t reserved16;  /* ordinal46 */
4233 	uint32_t reserved17;  /* ordinal47 */
4234 	uint32_t reserved18;  /* ordinal48 */
4235 	uint32_t reserved19;  /* ordinal49 */
4236 	uint32_t reserved20;  /* ordinal50 */
4237 	uint32_t reserved21;  /* ordinal51 */
4238 	uint32_t reserved22;  /* ordinal52 */
4239 	uint32_t reserved23;  /* ordinal53 */
4240 	uint32_t reserved24;  /* ordinal54 */
4241 	uint32_t reserved25;  /* ordinal55 */
4242 	uint32_t reserved26;  /* ordinal56 */
4243 	uint32_t reserved27;  /* ordinal57 */
4244 	uint32_t reserved28;  /* ordinal58 */
4245 	uint32_t reserved29;  /* ordinal59 */
4246 	uint32_t reserved30;  /* ordinal60 */
4247 	uint32_t reserved31;  /* ordinal61 */
4248 	uint32_t reserved32;  /* ordinal62 */
4249 	uint32_t reserved33;  /* ordinal63 */
4250 	uint32_t reserved34;  /* ordinal64 */
4251 	uint32_t compute_user_data_0;  /* ordinal65 */
4252 	uint32_t compute_user_data_1;  /* ordinal66 */
4253 	uint32_t compute_user_data_2;  /* ordinal67 */
4254 	uint32_t compute_user_data_3;  /* ordinal68 */
4255 	uint32_t compute_user_data_4;  /* ordinal69 */
4256 	uint32_t compute_user_data_5;  /* ordinal70 */
4257 	uint32_t compute_user_data_6;  /* ordinal71 */
4258 	uint32_t compute_user_data_7;  /* ordinal72 */
4259 	uint32_t compute_user_data_8;  /* ordinal73 */
4260 	uint32_t compute_user_data_9;  /* ordinal74 */
4261 	uint32_t compute_user_data_10;  /* ordinal75 */
4262 	uint32_t compute_user_data_11;  /* ordinal76 */
4263 	uint32_t compute_user_data_12;  /* ordinal77 */
4264 	uint32_t compute_user_data_13;  /* ordinal78 */
4265 	uint32_t compute_user_data_14;  /* ordinal79 */
4266 	uint32_t compute_user_data_15;  /* ordinal80 */
4267 	uint32_t cp_compute_csinvoc_count_lo;  /* ordinal81 */
4268 	uint32_t cp_compute_csinvoc_count_hi;  /* ordinal82 */
4269 	uint32_t reserved35;  /* ordinal83 */
4270 	uint32_t reserved36;  /* ordinal84 */
4271 	uint32_t reserved37;  /* ordinal85 */
4272 	uint32_t cp_mqd_query_time_lo;  /* ordinal86 */
4273 	uint32_t cp_mqd_query_time_hi;  /* ordinal87 */
4274 	uint32_t cp_mqd_connect_start_time_lo;  /* ordinal88 */
4275 	uint32_t cp_mqd_connect_start_time_hi;  /* ordinal89 */
4276 	uint32_t cp_mqd_connect_end_time_lo;  /* ordinal90 */
4277 	uint32_t cp_mqd_connect_end_time_hi;  /* ordinal91 */
4278 	uint32_t cp_mqd_connect_end_wf_count;  /* ordinal92 */
4279 	uint32_t cp_mqd_connect_end_pq_rptr;  /* ordinal93 */
4280 	uint32_t cp_mqd_connect_end_pq_wptr;  /* ordinal94 */
4281 	uint32_t cp_mqd_connect_end_ib_rptr;  /* ordinal95 */
4282 	uint32_t reserved38;  /* ordinal96 */
4283 	uint32_t reserved39;  /* ordinal97 */
4284 	uint32_t cp_mqd_save_start_time_lo;  /* ordinal98 */
4285 	uint32_t cp_mqd_save_start_time_hi;  /* ordinal99 */
4286 	uint32_t cp_mqd_save_end_time_lo;  /* ordinal100 */
4287 	uint32_t cp_mqd_save_end_time_hi;  /* ordinal101 */
4288 	uint32_t cp_mqd_restore_start_time_lo;  /* ordinal102 */
4289 	uint32_t cp_mqd_restore_start_time_hi;  /* ordinal103 */
4290 	uint32_t cp_mqd_restore_end_time_lo;  /* ordinal104 */
4291 	uint32_t cp_mqd_restore_end_time_hi;  /* ordinal105 */
4292 	uint32_t reserved40;  /* ordinal106 */
4293 	uint32_t reserved41;  /* ordinal107 */
4294 	uint32_t gds_cs_ctxsw_cnt0;  /* ordinal108 */
4295 	uint32_t gds_cs_ctxsw_cnt1;  /* ordinal109 */
4296 	uint32_t gds_cs_ctxsw_cnt2;  /* ordinal110 */
4297 	uint32_t gds_cs_ctxsw_cnt3;  /* ordinal111 */
4298 	uint32_t reserved42;  /* ordinal112 */
4299 	uint32_t reserved43;  /* ordinal113 */
4300 	uint32_t cp_pq_exe_status_lo;  /* ordinal114 */
4301 	uint32_t cp_pq_exe_status_hi;  /* ordinal115 */
4302 	uint32_t cp_packet_id_lo;  /* ordinal116 */
4303 	uint32_t cp_packet_id_hi;  /* ordinal117 */
4304 	uint32_t cp_packet_exe_status_lo;  /* ordinal118 */
4305 	uint32_t cp_packet_exe_status_hi;  /* ordinal119 */
4306 	uint32_t gds_save_base_addr_lo;  /* ordinal120 */
4307 	uint32_t gds_save_base_addr_hi;  /* ordinal121 */
4308 	uint32_t gds_save_mask_lo;  /* ordinal122 */
4309 	uint32_t gds_save_mask_hi;  /* ordinal123 */
4310 	uint32_t ctx_save_base_addr_lo;  /* ordinal124 */
4311 	uint32_t ctx_save_base_addr_hi;  /* ordinal125 */
4312 	uint32_t reserved44;  /* ordinal126 */
4313 	uint32_t reserved45;  /* ordinal127 */
4314 	uint32_t cp_mqd_base_addr_lo;  /* ordinal128 */
4315 	uint32_t cp_mqd_base_addr_hi;  /* ordinal129 */
4316 	uint32_t cp_hqd_active;  /* ordinal130 */
4317 	uint32_t cp_hqd_vmid;  /* ordinal131 */
4318 	uint32_t cp_hqd_persistent_state;  /* ordinal132 */
4319 	uint32_t cp_hqd_pipe_priority;  /* ordinal133 */
4320 	uint32_t cp_hqd_queue_priority;  /* ordinal134 */
4321 	uint32_t cp_hqd_quantum;  /* ordinal135 */
4322 	uint32_t cp_hqd_pq_base_lo;  /* ordinal136 */
4323 	uint32_t cp_hqd_pq_base_hi;  /* ordinal137 */
4324 	uint32_t cp_hqd_pq_rptr;  /* ordinal138 */
4325 	uint32_t cp_hqd_pq_rptr_report_addr_lo;  /* ordinal139 */
4326 	uint32_t cp_hqd_pq_rptr_report_addr_hi;  /* ordinal140 */
4327 	uint32_t cp_hqd_pq_wptr_poll_addr;  /* ordinal141 */
4328 	uint32_t cp_hqd_pq_wptr_poll_addr_hi;  /* ordinal142 */
4329 	uint32_t cp_hqd_pq_doorbell_control;  /* ordinal143 */
4330 	uint32_t cp_hqd_pq_wptr;  /* ordinal144 */
4331 	uint32_t cp_hqd_pq_control;  /* ordinal145 */
4332 	uint32_t cp_hqd_ib_base_addr_lo;  /* ordinal146 */
4333 	uint32_t cp_hqd_ib_base_addr_hi;  /* ordinal147 */
4334 	uint32_t cp_hqd_ib_rptr;  /* ordinal148 */
4335 	uint32_t cp_hqd_ib_control;  /* ordinal149 */
4336 	uint32_t cp_hqd_iq_timer;  /* ordinal150 */
4337 	uint32_t cp_hqd_iq_rptr;  /* ordinal151 */
4338 	uint32_t cp_hqd_dequeue_request;  /* ordinal152 */
4339 	uint32_t cp_hqd_dma_offload;  /* ordinal153 */
4340 	uint32_t cp_hqd_sema_cmd;  /* ordinal154 */
4341 	uint32_t cp_hqd_msg_type;  /* ordinal155 */
4342 	uint32_t cp_hqd_atomic0_preop_lo;  /* ordinal156 */
4343 	uint32_t cp_hqd_atomic0_preop_hi;  /* ordinal157 */
4344 	uint32_t cp_hqd_atomic1_preop_lo;  /* ordinal158 */
4345 	uint32_t cp_hqd_atomic1_preop_hi;  /* ordinal159 */
4346 	uint32_t cp_hqd_hq_status0;  /* ordinal160 */
4347 	uint32_t cp_hqd_hq_control0;  /* ordinal161 */
4348 	uint32_t cp_mqd_control;  /* ordinal162 */
4349 	uint32_t cp_hqd_hq_status1;  /* ordinal163 */
4350 	uint32_t cp_hqd_hq_control1;  /* ordinal164 */
4351 	uint32_t cp_hqd_eop_base_addr_lo;  /* ordinal165 */
4352 	uint32_t cp_hqd_eop_base_addr_hi;  /* ordinal166 */
4353 	uint32_t cp_hqd_eop_control;  /* ordinal167 */
4354 	uint32_t cp_hqd_eop_rptr;  /* ordinal168 */
4355 	uint32_t cp_hqd_eop_wptr;  /* ordinal169 */
4356 	uint32_t cp_hqd_eop_done_events;  /* ordinal170 */
4357 	uint32_t cp_hqd_ctx_save_base_addr_lo;  /* ordinal171 */
4358 	uint32_t cp_hqd_ctx_save_base_addr_hi;  /* ordinal172 */
4359 	uint32_t cp_hqd_ctx_save_control;  /* ordinal173 */
4360 	uint32_t cp_hqd_cntl_stack_offset;  /* ordinal174 */
4361 	uint32_t cp_hqd_cntl_stack_size;  /* ordinal175 */
4362 	uint32_t cp_hqd_wg_state_offset;  /* ordinal176 */
4363 	uint32_t cp_hqd_ctx_save_size;  /* ordinal177 */
4364 	uint32_t cp_hqd_gds_resource_state;  /* ordinal178 */
4365 	uint32_t cp_hqd_error;  /* ordinal179 */
4366 	uint32_t cp_hqd_eop_wptr_mem;  /* ordinal180 */
4367 	uint32_t cp_hqd_eop_dones;  /* ordinal181 */
4368 	uint32_t reserved46;  /* ordinal182 */
4369 	uint32_t reserved47;  /* ordinal183 */
4370 	uint32_t reserved48;  /* ordinal184 */
4371 	uint32_t reserved49;  /* ordinal185 */
4372 	uint32_t reserved50;  /* ordinal186 */
4373 	uint32_t reserved51;  /* ordinal187 */
4374 	uint32_t reserved52;  /* ordinal188 */
4375 	uint32_t reserved53;  /* ordinal189 */
4376 	uint32_t reserved54;  /* ordinal190 */
4377 	uint32_t reserved55;  /* ordinal191 */
4378 	uint32_t iqtimer_pkt_header;  /* ordinal192 */
4379 	uint32_t iqtimer_pkt_dw0;  /* ordinal193 */
4380 	uint32_t iqtimer_pkt_dw1;  /* ordinal194 */
4381 	uint32_t iqtimer_pkt_dw2;  /* ordinal195 */
4382 	uint32_t iqtimer_pkt_dw3;  /* ordinal196 */
4383 	uint32_t iqtimer_pkt_dw4;  /* ordinal197 */
4384 	uint32_t iqtimer_pkt_dw5;  /* ordinal198 */
4385 	uint32_t iqtimer_pkt_dw6;  /* ordinal199 */
4386 	uint32_t iqtimer_pkt_dw7;  /* ordinal200 */
4387 	uint32_t iqtimer_pkt_dw8;  /* ordinal201 */
4388 	uint32_t iqtimer_pkt_dw9;  /* ordinal202 */
4389 	uint32_t iqtimer_pkt_dw10;  /* ordinal203 */
4390 	uint32_t iqtimer_pkt_dw11;  /* ordinal204 */
4391 	uint32_t iqtimer_pkt_dw12;  /* ordinal205 */
4392 	uint32_t iqtimer_pkt_dw13;  /* ordinal206 */
4393 	uint32_t iqtimer_pkt_dw14;  /* ordinal207 */
4394 	uint32_t iqtimer_pkt_dw15;  /* ordinal208 */
4395 	uint32_t iqtimer_pkt_dw16;  /* ordinal209 */
4396 	uint32_t iqtimer_pkt_dw17;  /* ordinal210 */
4397 	uint32_t iqtimer_pkt_dw18;  /* ordinal211 */
4398 	uint32_t iqtimer_pkt_dw19;  /* ordinal212 */
4399 	uint32_t iqtimer_pkt_dw20;  /* ordinal213 */
4400 	uint32_t iqtimer_pkt_dw21;  /* ordinal214 */
4401 	uint32_t iqtimer_pkt_dw22;  /* ordinal215 */
4402 	uint32_t iqtimer_pkt_dw23;  /* ordinal216 */
4403 	uint32_t iqtimer_pkt_dw24;  /* ordinal217 */
4404 	uint32_t iqtimer_pkt_dw25;  /* ordinal218 */
4405 	uint32_t iqtimer_pkt_dw26;  /* ordinal219 */
4406 	uint32_t iqtimer_pkt_dw27;  /* ordinal220 */
4407 	uint32_t iqtimer_pkt_dw28;  /* ordinal221 */
4408 	uint32_t iqtimer_pkt_dw29;  /* ordinal222 */
4409 	uint32_t iqtimer_pkt_dw30;  /* ordinal223 */
4410 	uint32_t iqtimer_pkt_dw31;  /* ordinal224 */
4411 	uint32_t reserved56;  /* ordinal225 */
4412 	uint32_t reserved57;  /* ordinal226 */
4413 	uint32_t reserved58;  /* ordinal227 */
4414 	uint32_t set_resources_header;  /* ordinal228 */
4415 	uint32_t set_resources_dw1;  /* ordinal229 */
4416 	uint32_t set_resources_dw2;  /* ordinal230 */
4417 	uint32_t set_resources_dw3;  /* ordinal231 */
4418 	uint32_t set_resources_dw4;  /* ordinal232 */
4419 	uint32_t set_resources_dw5;  /* ordinal233 */
4420 	uint32_t set_resources_dw6;  /* ordinal234 */
4421 	uint32_t set_resources_dw7;  /* ordinal235 */
4422 	uint32_t reserved59;  /* ordinal236 */
4423 	uint32_t reserved60;  /* ordinal237 */
4424 	uint32_t reserved61;  /* ordinal238 */
4425 	uint32_t reserved62;  /* ordinal239 */
4426 	uint32_t reserved63;  /* ordinal240 */
4427 	uint32_t reserved64;  /* ordinal241 */
4428 	uint32_t reserved65;  /* ordinal242 */
4429 	uint32_t reserved66;  /* ordinal243 */
4430 	uint32_t reserved67;  /* ordinal244 */
4431 	uint32_t reserved68;  /* ordinal245 */
4432 	uint32_t reserved69;  /* ordinal246 */
4433 	uint32_t reserved70;  /* ordinal247 */
4434 	uint32_t reserved71;  /* ordinal248 */
4435 	uint32_t reserved72;  /* ordinal249 */
4436 	uint32_t reserved73;  /* ordinal250 */
4437 	uint32_t reserved74;  /* ordinal251 */
4438 	uint32_t reserved75;  /* ordinal252 */
4439 	uint32_t reserved76;  /* ordinal253 */
4440 	uint32_t reserved77;  /* ordinal254 */
4441 	uint32_t reserved78;  /* ordinal255 */
4442 
4443 	uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
4444 };
4445 
4446 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4447 {
4448 	int i, r;
4449 
4450 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4451 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4452 
4453 		if (ring->mqd_obj) {
4454 			r = amdgpu_bo_reserve(ring->mqd_obj, false);
4455 			if (unlikely(r != 0))
4456 				dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4457 
4458 			amdgpu_bo_unpin(ring->mqd_obj);
4459 			amdgpu_bo_unreserve(ring->mqd_obj);
4460 
4461 			amdgpu_bo_unref(&ring->mqd_obj);
4462 			ring->mqd_obj = NULL;
4463 		}
4464 	}
4465 }
4466 
4467 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4468 {
4469 	int r, i, j;
4470 	u32 tmp;
4471 	bool use_doorbell = true;
4472 	u64 hqd_gpu_addr;
4473 	u64 mqd_gpu_addr;
4474 	u64 eop_gpu_addr;
4475 	u64 wb_gpu_addr;
4476 	u32 *buf;
4477 	struct vi_mqd *mqd;
4478 
4479 	/* init the pipes */
4480 	mutex_lock(&adev->srbm_mutex);
4481 	for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
4482 		int me = (i < 4) ? 1 : 2;
4483 		int pipe = (i < 4) ? i : (i - 4);
4484 
4485 		eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
4486 		eop_gpu_addr >>= 8;
4487 
4488 		vi_srbm_select(adev, me, pipe, 0, 0);
4489 
4490 		/* write the EOP addr */
4491 		WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
4492 		WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4493 
4494 		/* set the VMID assigned */
4495 		WREG32(mmCP_HQD_VMID, 0);
4496 
4497 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4498 		tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4499 		tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4500 				    (order_base_2(MEC_HPD_SIZE / 4) - 1));
4501 		WREG32(mmCP_HQD_EOP_CONTROL, tmp);
4502 	}
4503 	vi_srbm_select(adev, 0, 0, 0, 0);
4504 	mutex_unlock(&adev->srbm_mutex);
4505 
4506 	/* init the queues.  Just two for now. */
4507 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4508 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4509 
4510 		if (ring->mqd_obj == NULL) {
4511 			r = amdgpu_bo_create(adev,
4512 					     sizeof(struct vi_mqd),
4513 					     PAGE_SIZE, true,
4514 					     AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4515 					     NULL, &ring->mqd_obj);
4516 			if (r) {
4517 				dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4518 				return r;
4519 			}
4520 		}
4521 
4522 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
4523 		if (unlikely(r != 0)) {
4524 			gfx_v8_0_cp_compute_fini(adev);
4525 			return r;
4526 		}
4527 		r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4528 				  &mqd_gpu_addr);
4529 		if (r) {
4530 			dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4531 			gfx_v8_0_cp_compute_fini(adev);
4532 			return r;
4533 		}
4534 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4535 		if (r) {
4536 			dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4537 			gfx_v8_0_cp_compute_fini(adev);
4538 			return r;
4539 		}
4540 
4541 		/* init the mqd struct */
4542 		memset(buf, 0, sizeof(struct vi_mqd));
4543 
4544 		mqd = (struct vi_mqd *)buf;
4545 		mqd->header = 0xC0310800;
4546 		mqd->compute_pipelinestat_enable = 0x00000001;
4547 		mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4548 		mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4549 		mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4550 		mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4551 		mqd->compute_misc_reserved = 0x00000003;
4552 
4553 		mutex_lock(&adev->srbm_mutex);
4554 		vi_srbm_select(adev, ring->me,
4555 			       ring->pipe,
4556 			       ring->queue, 0);
4557 
4558 		/* disable wptr polling */
4559 		tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4560 		tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4561 		WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4562 
4563 		mqd->cp_hqd_eop_base_addr_lo =
4564 			RREG32(mmCP_HQD_EOP_BASE_ADDR);
4565 		mqd->cp_hqd_eop_base_addr_hi =
4566 			RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
4567 
4568 		/* enable doorbell? */
4569 		tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4570 		if (use_doorbell) {
4571 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4572 		} else {
4573 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4574 		}
4575 		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4576 		mqd->cp_hqd_pq_doorbell_control = tmp;
4577 
4578 		/* disable the queue if it's active */
4579 		mqd->cp_hqd_dequeue_request = 0;
4580 		mqd->cp_hqd_pq_rptr = 0;
4581 		mqd->cp_hqd_pq_wptr= 0;
4582 		if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4583 			WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4584 			for (j = 0; j < adev->usec_timeout; j++) {
4585 				if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4586 					break;
4587 				udelay(1);
4588 			}
4589 			WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4590 			WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4591 			WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4592 		}
4593 
4594 		/* set the pointer to the MQD */
4595 		mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4596 		mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4597 		WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4598 		WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4599 
4600 		/* set MQD vmid to 0 */
4601 		tmp = RREG32(mmCP_MQD_CONTROL);
4602 		tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4603 		WREG32(mmCP_MQD_CONTROL, tmp);
4604 		mqd->cp_mqd_control = tmp;
4605 
4606 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4607 		hqd_gpu_addr = ring->gpu_addr >> 8;
4608 		mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4609 		mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4610 		WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4611 		WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4612 
4613 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4614 		tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4615 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4616 				    (order_base_2(ring->ring_size / 4) - 1));
4617 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4618 			       ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4619 #ifdef __BIG_ENDIAN
4620 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4621 #endif
4622 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4623 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4624 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4625 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4626 		WREG32(mmCP_HQD_PQ_CONTROL, tmp);
4627 		mqd->cp_hqd_pq_control = tmp;
4628 
4629 		/* set the wb address wether it's enabled or not */
4630 		wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4631 		mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4632 		mqd->cp_hqd_pq_rptr_report_addr_hi =
4633 			upper_32_bits(wb_gpu_addr) & 0xffff;
4634 		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4635 		       mqd->cp_hqd_pq_rptr_report_addr_lo);
4636 		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4637 		       mqd->cp_hqd_pq_rptr_report_addr_hi);
4638 
4639 		/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4640 		wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4641 		mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4642 		mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4643 		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
4644 		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4645 		       mqd->cp_hqd_pq_wptr_poll_addr_hi);
4646 
4647 		/* enable the doorbell if requested */
4648 		if (use_doorbell) {
4649 			if ((adev->asic_type == CHIP_CARRIZO) ||
4650 			    (adev->asic_type == CHIP_FIJI) ||
4651 			    (adev->asic_type == CHIP_STONEY) ||
4652 			    (adev->asic_type == CHIP_POLARIS11) ||
4653 			    (adev->asic_type == CHIP_POLARIS10)) {
4654 				WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4655 				       AMDGPU_DOORBELL_KIQ << 2);
4656 				WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4657 				       AMDGPU_DOORBELL_MEC_RING7 << 2);
4658 			}
4659 			tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4660 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4661 					    DOORBELL_OFFSET, ring->doorbell_index);
4662 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4663 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4664 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4665 			mqd->cp_hqd_pq_doorbell_control = tmp;
4666 
4667 		} else {
4668 			mqd->cp_hqd_pq_doorbell_control = 0;
4669 		}
4670 		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
4671 		       mqd->cp_hqd_pq_doorbell_control);
4672 
4673 		/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4674 		ring->wptr = 0;
4675 		mqd->cp_hqd_pq_wptr = ring->wptr;
4676 		WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4677 		mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4678 
4679 		/* set the vmid for the queue */
4680 		mqd->cp_hqd_vmid = 0;
4681 		WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4682 
4683 		tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4684 		tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4685 		WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
4686 		mqd->cp_hqd_persistent_state = tmp;
4687 		if (adev->asic_type == CHIP_STONEY ||
4688 			adev->asic_type == CHIP_POLARIS11 ||
4689 			adev->asic_type == CHIP_POLARIS10) {
4690 			tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
4691 			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
4692 			WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
4693 		}
4694 
4695 		/* activate the queue */
4696 		mqd->cp_hqd_active = 1;
4697 		WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4698 
4699 		vi_srbm_select(adev, 0, 0, 0, 0);
4700 		mutex_unlock(&adev->srbm_mutex);
4701 
4702 		amdgpu_bo_kunmap(ring->mqd_obj);
4703 		amdgpu_bo_unreserve(ring->mqd_obj);
4704 	}
4705 
4706 	if (use_doorbell) {
4707 		tmp = RREG32(mmCP_PQ_STATUS);
4708 		tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4709 		WREG32(mmCP_PQ_STATUS, tmp);
4710 	}
4711 
4712 	gfx_v8_0_cp_compute_enable(adev, true);
4713 
4714 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4715 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4716 
4717 		ring->ready = true;
4718 		r = amdgpu_ring_test_ring(ring);
4719 		if (r)
4720 			ring->ready = false;
4721 	}
4722 
4723 	return 0;
4724 }
4725 
4726 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4727 {
4728 	int r;
4729 
4730 	if (!(adev->flags & AMD_IS_APU))
4731 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4732 
4733 	if (!adev->pp_enabled) {
4734 		if (!adev->firmware.smu_load) {
4735 			/* legacy firmware loading */
4736 			r = gfx_v8_0_cp_gfx_load_microcode(adev);
4737 			if (r)
4738 				return r;
4739 
4740 			r = gfx_v8_0_cp_compute_load_microcode(adev);
4741 			if (r)
4742 				return r;
4743 		} else {
4744 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4745 							AMDGPU_UCODE_ID_CP_CE);
4746 			if (r)
4747 				return -EINVAL;
4748 
4749 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4750 							AMDGPU_UCODE_ID_CP_PFP);
4751 			if (r)
4752 				return -EINVAL;
4753 
4754 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4755 							AMDGPU_UCODE_ID_CP_ME);
4756 			if (r)
4757 				return -EINVAL;
4758 
4759 			if (adev->asic_type == CHIP_TOPAZ) {
4760 				r = gfx_v8_0_cp_compute_load_microcode(adev);
4761 				if (r)
4762 					return r;
4763 			} else {
4764 				r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4765 										 AMDGPU_UCODE_ID_CP_MEC1);
4766 				if (r)
4767 					return -EINVAL;
4768 			}
4769 		}
4770 	}
4771 
4772 	r = gfx_v8_0_cp_gfx_resume(adev);
4773 	if (r)
4774 		return r;
4775 
4776 	r = gfx_v8_0_cp_compute_resume(adev);
4777 	if (r)
4778 		return r;
4779 
4780 	gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4781 
4782 	return 0;
4783 }
4784 
4785 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4786 {
4787 	gfx_v8_0_cp_gfx_enable(adev, enable);
4788 	gfx_v8_0_cp_compute_enable(adev, enable);
4789 }
4790 
4791 static int gfx_v8_0_hw_init(void *handle)
4792 {
4793 	int r;
4794 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4795 
4796 	gfx_v8_0_init_golden_registers(adev);
4797 
4798 	gfx_v8_0_gpu_init(adev);
4799 
4800 	r = gfx_v8_0_rlc_resume(adev);
4801 	if (r)
4802 		return r;
4803 
4804 	r = gfx_v8_0_cp_resume(adev);
4805 	if (r)
4806 		return r;
4807 
4808 	return r;
4809 }
4810 
4811 static int gfx_v8_0_hw_fini(void *handle)
4812 {
4813 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4814 
4815 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4816 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4817 	gfx_v8_0_cp_enable(adev, false);
4818 	gfx_v8_0_rlc_stop(adev);
4819 	gfx_v8_0_cp_compute_fini(adev);
4820 
4821 	amdgpu_set_powergating_state(adev,
4822 			AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
4823 
4824 	return 0;
4825 }
4826 
4827 static int gfx_v8_0_suspend(void *handle)
4828 {
4829 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4830 
4831 	return gfx_v8_0_hw_fini(adev);
4832 }
4833 
4834 static int gfx_v8_0_resume(void *handle)
4835 {
4836 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4837 
4838 	return gfx_v8_0_hw_init(adev);
4839 }
4840 
4841 static bool gfx_v8_0_is_idle(void *handle)
4842 {
4843 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4844 
4845 	if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
4846 		return false;
4847 	else
4848 		return true;
4849 }
4850 
4851 static int gfx_v8_0_wait_for_idle(void *handle)
4852 {
4853 	unsigned i;
4854 	u32 tmp;
4855 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4856 
4857 	for (i = 0; i < adev->usec_timeout; i++) {
4858 		/* read MC_STATUS */
4859 		tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
4860 
4861 		if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
4862 			return 0;
4863 		udelay(1);
4864 	}
4865 	return -ETIMEDOUT;
4866 }
4867 
4868 static int gfx_v8_0_soft_reset(void *handle)
4869 {
4870 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4871 	u32 tmp;
4872 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4873 
4874 	/* GRBM_STATUS */
4875 	tmp = RREG32(mmGRBM_STATUS);
4876 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4877 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4878 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4879 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4880 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4881 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4882 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4883 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4884 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4885 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4886 	}
4887 
4888 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4889 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4890 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4891 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4892 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4893 	}
4894 
4895 	/* GRBM_STATUS2 */
4896 	tmp = RREG32(mmGRBM_STATUS2);
4897 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4898 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4899 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4900 
4901 	/* SRBM_STATUS */
4902 	tmp = RREG32(mmSRBM_STATUS);
4903 	if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4904 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4905 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4906 
4907 	if (grbm_soft_reset || srbm_soft_reset) {
4908 		/* stop the rlc */
4909 		gfx_v8_0_rlc_stop(adev);
4910 
4911 		/* Disable GFX parsing/prefetching */
4912 		gfx_v8_0_cp_gfx_enable(adev, false);
4913 
4914 		/* Disable MEC parsing/prefetching */
4915 		gfx_v8_0_cp_compute_enable(adev, false);
4916 
4917 		if (grbm_soft_reset || srbm_soft_reset) {
4918 			tmp = RREG32(mmGMCON_DEBUG);
4919 			tmp = REG_SET_FIELD(tmp,
4920 					    GMCON_DEBUG, GFX_STALL, 1);
4921 			tmp = REG_SET_FIELD(tmp,
4922 					    GMCON_DEBUG, GFX_CLEAR, 1);
4923 			WREG32(mmGMCON_DEBUG, tmp);
4924 
4925 			udelay(50);
4926 		}
4927 
4928 		if (grbm_soft_reset) {
4929 			tmp = RREG32(mmGRBM_SOFT_RESET);
4930 			tmp |= grbm_soft_reset;
4931 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4932 			WREG32(mmGRBM_SOFT_RESET, tmp);
4933 			tmp = RREG32(mmGRBM_SOFT_RESET);
4934 
4935 			udelay(50);
4936 
4937 			tmp &= ~grbm_soft_reset;
4938 			WREG32(mmGRBM_SOFT_RESET, tmp);
4939 			tmp = RREG32(mmGRBM_SOFT_RESET);
4940 		}
4941 
4942 		if (srbm_soft_reset) {
4943 			tmp = RREG32(mmSRBM_SOFT_RESET);
4944 			tmp |= srbm_soft_reset;
4945 			dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4946 			WREG32(mmSRBM_SOFT_RESET, tmp);
4947 			tmp = RREG32(mmSRBM_SOFT_RESET);
4948 
4949 			udelay(50);
4950 
4951 			tmp &= ~srbm_soft_reset;
4952 			WREG32(mmSRBM_SOFT_RESET, tmp);
4953 			tmp = RREG32(mmSRBM_SOFT_RESET);
4954 		}
4955 
4956 		if (grbm_soft_reset || srbm_soft_reset) {
4957 			tmp = RREG32(mmGMCON_DEBUG);
4958 			tmp = REG_SET_FIELD(tmp,
4959 					    GMCON_DEBUG, GFX_STALL, 0);
4960 			tmp = REG_SET_FIELD(tmp,
4961 					    GMCON_DEBUG, GFX_CLEAR, 0);
4962 			WREG32(mmGMCON_DEBUG, tmp);
4963 		}
4964 
4965 		/* Wait a little for things to settle down */
4966 		udelay(50);
4967 	}
4968 	return 0;
4969 }
4970 
4971 /**
4972  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
4973  *
4974  * @adev: amdgpu_device pointer
4975  *
4976  * Fetches a GPU clock counter snapshot.
4977  * Returns the 64 bit clock counter snapshot.
4978  */
4979 uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4980 {
4981 	uint64_t clock;
4982 
4983 	mutex_lock(&adev->gfx.gpu_clock_mutex);
4984 	WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4985 	clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
4986 		((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4987 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
4988 	return clock;
4989 }
4990 
4991 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4992 					  uint32_t vmid,
4993 					  uint32_t gds_base, uint32_t gds_size,
4994 					  uint32_t gws_base, uint32_t gws_size,
4995 					  uint32_t oa_base, uint32_t oa_size)
4996 {
4997 	gds_base = gds_base >> AMDGPU_GDS_SHIFT;
4998 	gds_size = gds_size >> AMDGPU_GDS_SHIFT;
4999 
5000 	gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5001 	gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5002 
5003 	oa_base = oa_base >> AMDGPU_OA_SHIFT;
5004 	oa_size = oa_size >> AMDGPU_OA_SHIFT;
5005 
5006 	/* GDS Base */
5007 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5008 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5009 				WRITE_DATA_DST_SEL(0)));
5010 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5011 	amdgpu_ring_write(ring, 0);
5012 	amdgpu_ring_write(ring, gds_base);
5013 
5014 	/* GDS Size */
5015 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5016 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5017 				WRITE_DATA_DST_SEL(0)));
5018 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5019 	amdgpu_ring_write(ring, 0);
5020 	amdgpu_ring_write(ring, gds_size);
5021 
5022 	/* GWS */
5023 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5024 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5025 				WRITE_DATA_DST_SEL(0)));
5026 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5027 	amdgpu_ring_write(ring, 0);
5028 	amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5029 
5030 	/* OA */
5031 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5032 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5033 				WRITE_DATA_DST_SEL(0)));
5034 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5035 	amdgpu_ring_write(ring, 0);
5036 	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5037 }
5038 
5039 static int gfx_v8_0_early_init(void *handle)
5040 {
5041 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5042 
5043 	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5044 	adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5045 	gfx_v8_0_set_ring_funcs(adev);
5046 	gfx_v8_0_set_irq_funcs(adev);
5047 	gfx_v8_0_set_gds_init(adev);
5048 	gfx_v8_0_set_rlc_funcs(adev);
5049 
5050 	return 0;
5051 }
5052 
5053 static int gfx_v8_0_late_init(void *handle)
5054 {
5055 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5056 	int r;
5057 
5058 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5059 	if (r)
5060 		return r;
5061 
5062 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5063 	if (r)
5064 		return r;
5065 
5066 	/* requires IBs so do in late init after IB pool is initialized */
5067 	r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5068 	if (r)
5069 		return r;
5070 
5071 	amdgpu_set_powergating_state(adev,
5072 			AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5073 
5074 	return 0;
5075 }
5076 
5077 static void polaris11_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5078 		bool enable)
5079 {
5080 	uint32_t data, temp;
5081 
5082 	/* Send msg to SMU via Powerplay */
5083 	amdgpu_set_powergating_state(adev,
5084 			AMD_IP_BLOCK_TYPE_SMC,
5085 			enable ? AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5086 
5087 	if (enable) {
5088 		/* Enable static MGPG */
5089 		temp = data = RREG32(mmRLC_PG_CNTL);
5090 		data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
5091 
5092 		if (temp != data)
5093 			WREG32(mmRLC_PG_CNTL, data);
5094 	} else {
5095 		temp = data = RREG32(mmRLC_PG_CNTL);
5096 		data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
5097 
5098 		if (temp != data)
5099 			WREG32(mmRLC_PG_CNTL, data);
5100 	}
5101 }
5102 
5103 static void polaris11_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5104 		bool enable)
5105 {
5106 	uint32_t data, temp;
5107 
5108 	if (enable) {
5109 		/* Enable dynamic MGPG */
5110 		temp = data = RREG32(mmRLC_PG_CNTL);
5111 		data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
5112 
5113 		if (temp != data)
5114 			WREG32(mmRLC_PG_CNTL, data);
5115 	} else {
5116 		temp = data = RREG32(mmRLC_PG_CNTL);
5117 		data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
5118 
5119 		if (temp != data)
5120 			WREG32(mmRLC_PG_CNTL, data);
5121 	}
5122 }
5123 
5124 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5125 		bool enable)
5126 {
5127 	uint32_t data, temp;
5128 
5129 	if (enable) {
5130 		/* Enable quick PG */
5131 		temp = data = RREG32(mmRLC_PG_CNTL);
5132 		data |= 0x100000;
5133 
5134 		if (temp != data)
5135 			WREG32(mmRLC_PG_CNTL, data);
5136 	} else {
5137 		temp = data = RREG32(mmRLC_PG_CNTL);
5138 		data &= ~0x100000;
5139 
5140 		if (temp != data)
5141 			WREG32(mmRLC_PG_CNTL, data);
5142 	}
5143 }
5144 
5145 static int gfx_v8_0_set_powergating_state(void *handle,
5146 					  enum amd_powergating_state state)
5147 {
5148 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5149 
5150 	if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5151 		return 0;
5152 
5153 	switch (adev->asic_type) {
5154 	case CHIP_POLARIS11:
5155 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG)
5156 			polaris11_enable_gfx_static_mg_power_gating(adev,
5157 					state == AMD_PG_STATE_GATE ? true : false);
5158 		else if (adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG)
5159 			polaris11_enable_gfx_dynamic_mg_power_gating(adev,
5160 					state == AMD_PG_STATE_GATE ? true : false);
5161 		else
5162 			polaris11_enable_gfx_quick_mg_power_gating(adev,
5163 					state == AMD_PG_STATE_GATE ? true : false);
5164 		break;
5165 	default:
5166 		break;
5167 	}
5168 
5169 	return 0;
5170 }
5171 
5172 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5173 				     uint32_t reg_addr, uint32_t cmd)
5174 {
5175 	uint32_t data;
5176 
5177 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
5178 
5179 	WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5180 	WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5181 
5182 	data = RREG32(mmRLC_SERDES_WR_CTRL);
5183 	if (adev->asic_type == CHIP_STONEY)
5184 			data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5185 			RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5186 			RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5187 			RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5188 			RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5189 			RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5190 			RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5191 			RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5192 			RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5193 	else
5194 		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5195 			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5196 			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5197 			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5198 			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5199 			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5200 			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5201 			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5202 			  RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5203 			  RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5204 			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5205 	data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5206 		 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5207 		 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5208 		 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5209 
5210 	WREG32(mmRLC_SERDES_WR_CTRL, data);
5211 }
5212 
5213 #define MSG_ENTER_RLC_SAFE_MODE     1
5214 #define MSG_EXIT_RLC_SAFE_MODE      0
5215 
5216 #define RLC_GPR_REG2__REQ_MASK           0x00000001
5217 #define RLC_GPR_REG2__MESSAGE__SHIFT     0x00000001
5218 #define RLC_GPR_REG2__MESSAGE_MASK       0x0000001e
5219 
5220 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
5221 {
5222 	u32 data = 0;
5223 	unsigned i;
5224 
5225 	data = RREG32(mmRLC_CNTL);
5226 	if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5227 		return;
5228 
5229 	if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5230 	    (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5231 			       AMD_PG_SUPPORT_GFX_DMG))) {
5232 		data |= RLC_GPR_REG2__REQ_MASK;
5233 		data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5234 		data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5235 		WREG32(mmRLC_GPR_REG2, data);
5236 
5237 		for (i = 0; i < adev->usec_timeout; i++) {
5238 			if ((RREG32(mmRLC_GPM_STAT) &
5239 			     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5240 			      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5241 			    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5242 			     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5243 				break;
5244 			udelay(1);
5245 		}
5246 
5247 		for (i = 0; i < adev->usec_timeout; i++) {
5248 			if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
5249 				break;
5250 			udelay(1);
5251 		}
5252 		adev->gfx.rlc.in_safe_mode = true;
5253 	}
5254 }
5255 
5256 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
5257 {
5258 	u32 data;
5259 	unsigned i;
5260 
5261 	data = RREG32(mmRLC_CNTL);
5262 	if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5263 		return;
5264 
5265 	if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5266 	    (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5267 			       AMD_PG_SUPPORT_GFX_DMG))) {
5268 		data |= RLC_GPR_REG2__REQ_MASK;
5269 		data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5270 		data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5271 		WREG32(mmRLC_GPR_REG2, data);
5272 		adev->gfx.rlc.in_safe_mode = false;
5273 	}
5274 
5275 	for (i = 0; i < adev->usec_timeout; i++) {
5276 		if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
5277 			break;
5278 		udelay(1);
5279 	}
5280 }
5281 
5282 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5283 {
5284 	u32 data;
5285 	unsigned i;
5286 
5287 	data = RREG32(mmRLC_CNTL);
5288 	if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5289 		return;
5290 
5291 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5292 		data |= RLC_SAFE_MODE__CMD_MASK;
5293 		data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5294 		data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5295 		WREG32(mmRLC_SAFE_MODE, data);
5296 
5297 		for (i = 0; i < adev->usec_timeout; i++) {
5298 			if ((RREG32(mmRLC_GPM_STAT) &
5299 			     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5300 			      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5301 			    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5302 			     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5303 				break;
5304 			udelay(1);
5305 		}
5306 
5307 		for (i = 0; i < adev->usec_timeout; i++) {
5308 			if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
5309 				break;
5310 			udelay(1);
5311 		}
5312 		adev->gfx.rlc.in_safe_mode = true;
5313 	}
5314 }
5315 
5316 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5317 {
5318 	u32 data = 0;
5319 	unsigned i;
5320 
5321 	data = RREG32(mmRLC_CNTL);
5322 	if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5323 		return;
5324 
5325 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5326 		if (adev->gfx.rlc.in_safe_mode) {
5327 			data |= RLC_SAFE_MODE__CMD_MASK;
5328 			data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5329 			WREG32(mmRLC_SAFE_MODE, data);
5330 			adev->gfx.rlc.in_safe_mode = false;
5331 		}
5332 	}
5333 
5334 	for (i = 0; i < adev->usec_timeout; i++) {
5335 		if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
5336 			break;
5337 		udelay(1);
5338 	}
5339 }
5340 
5341 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
5342 {
5343 	adev->gfx.rlc.in_safe_mode = true;
5344 }
5345 
5346 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
5347 {
5348 	adev->gfx.rlc.in_safe_mode = false;
5349 }
5350 
5351 static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
5352 	.enter_safe_mode = cz_enter_rlc_safe_mode,
5353 	.exit_safe_mode = cz_exit_rlc_safe_mode
5354 };
5355 
5356 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5357 	.enter_safe_mode = iceland_enter_rlc_safe_mode,
5358 	.exit_safe_mode = iceland_exit_rlc_safe_mode
5359 };
5360 
5361 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
5362 	.enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
5363 	.exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
5364 };
5365 
5366 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5367 						      bool enable)
5368 {
5369 	uint32_t temp, data;
5370 
5371 	adev->gfx.rlc.funcs->enter_safe_mode(adev);
5372 
5373 	/* It is disabled by HW by default */
5374 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5375 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5376 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5377 				/* 1 - RLC memory Light sleep */
5378 				temp = data = RREG32(mmRLC_MEM_SLP_CNTL);
5379 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5380 				if (temp != data)
5381 					WREG32(mmRLC_MEM_SLP_CNTL, data);
5382 			}
5383 
5384 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5385 				/* 2 - CP memory Light sleep */
5386 				temp = data = RREG32(mmCP_MEM_SLP_CNTL);
5387 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5388 				if (temp != data)
5389 					WREG32(mmCP_MEM_SLP_CNTL, data);
5390 			}
5391 		}
5392 
5393 		/* 3 - RLC_CGTT_MGCG_OVERRIDE */
5394 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5395 		if (adev->flags & AMD_IS_APU)
5396 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5397 				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5398 				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5399 		else
5400 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5401 				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5402 				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5403 				  RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5404 
5405 		if (temp != data)
5406 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5407 
5408 		/* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5409 		gfx_v8_0_wait_for_rlc_serdes(adev);
5410 
5411 		/* 5 - clear mgcg override */
5412 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5413 
5414 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5415 			/* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5416 			temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5417 			data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5418 			data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5419 			data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5420 			data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5421 			if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5422 			    (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5423 				data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5424 			data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5425 			data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5426 			if (temp != data)
5427 				WREG32(mmCGTS_SM_CTRL_REG, data);
5428 		}
5429 		udelay(50);
5430 
5431 		/* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5432 		gfx_v8_0_wait_for_rlc_serdes(adev);
5433 	} else {
5434 		/* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5435 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5436 		data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5437 				RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5438 				RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5439 				RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5440 		if (temp != data)
5441 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5442 
5443 		/* 2 - disable MGLS in RLC */
5444 		data = RREG32(mmRLC_MEM_SLP_CNTL);
5445 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5446 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5447 			WREG32(mmRLC_MEM_SLP_CNTL, data);
5448 		}
5449 
5450 		/* 3 - disable MGLS in CP */
5451 		data = RREG32(mmCP_MEM_SLP_CNTL);
5452 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5453 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5454 			WREG32(mmCP_MEM_SLP_CNTL, data);
5455 		}
5456 
5457 		/* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5458 		temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5459 		data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5460 				CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5461 		if (temp != data)
5462 			WREG32(mmCGTS_SM_CTRL_REG, data);
5463 
5464 		/* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5465 		gfx_v8_0_wait_for_rlc_serdes(adev);
5466 
5467 		/* 6 - set mgcg override */
5468 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5469 
5470 		udelay(50);
5471 
5472 		/* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5473 		gfx_v8_0_wait_for_rlc_serdes(adev);
5474 	}
5475 
5476 	adev->gfx.rlc.funcs->exit_safe_mode(adev);
5477 }
5478 
5479 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5480 						      bool enable)
5481 {
5482 	uint32_t temp, temp1, data, data1;
5483 
5484 	temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5485 
5486 	adev->gfx.rlc.funcs->enter_safe_mode(adev);
5487 
5488 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5489 		/* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
5490 		 * Cmp_busy/GFX_Idle interrupts
5491 		 */
5492 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5493 
5494 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5495 		data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5496 		if (temp1 != data1)
5497 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5498 
5499 		/* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5500 		gfx_v8_0_wait_for_rlc_serdes(adev);
5501 
5502 		/* 3 - clear cgcg override */
5503 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5504 
5505 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5506 		gfx_v8_0_wait_for_rlc_serdes(adev);
5507 
5508 		/* 4 - write cmd to set CGLS */
5509 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5510 
5511 		/* 5 - enable cgcg */
5512 		data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5513 
5514 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5515 			/* enable cgls*/
5516 			data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5517 
5518 			temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5519 			data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5520 
5521 			if (temp1 != data1)
5522 				WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5523 		} else {
5524 			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5525 		}
5526 
5527 		if (temp != data)
5528 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5529 	} else {
5530 		/* disable cntx_empty_int_enable & GFX Idle interrupt */
5531 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5532 
5533 		/* TEST CGCG */
5534 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5535 		data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5536 				RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5537 		if (temp1 != data1)
5538 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5539 
5540 		/* read gfx register to wake up cgcg */
5541 		RREG32(mmCB_CGTT_SCLK_CTRL);
5542 		RREG32(mmCB_CGTT_SCLK_CTRL);
5543 		RREG32(mmCB_CGTT_SCLK_CTRL);
5544 		RREG32(mmCB_CGTT_SCLK_CTRL);
5545 
5546 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5547 		gfx_v8_0_wait_for_rlc_serdes(adev);
5548 
5549 		/* write cmd to Set CGCG Overrride */
5550 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5551 
5552 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5553 		gfx_v8_0_wait_for_rlc_serdes(adev);
5554 
5555 		/* write cmd to Clear CGLS */
5556 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5557 
5558 		/* disable cgcg, cgls should be disabled too. */
5559 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5560 			  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5561 		if (temp != data)
5562 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5563 	}
5564 
5565 	adev->gfx.rlc.funcs->exit_safe_mode(adev);
5566 }
5567 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5568 					    bool enable)
5569 {
5570 	if (enable) {
5571 		/* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5572 		 * ===  MGCG + MGLS + TS(CG/LS) ===
5573 		 */
5574 		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5575 		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5576 	} else {
5577 		/* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5578 		 * ===  CGCG + CGLS ===
5579 		 */
5580 		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5581 		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5582 	}
5583 	return 0;
5584 }
5585 
5586 static int gfx_v8_0_set_clockgating_state(void *handle,
5587 					  enum amd_clockgating_state state)
5588 {
5589 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5590 
5591 	switch (adev->asic_type) {
5592 	case CHIP_FIJI:
5593 	case CHIP_CARRIZO:
5594 	case CHIP_STONEY:
5595 		gfx_v8_0_update_gfx_clock_gating(adev,
5596 						 state == AMD_CG_STATE_GATE ? true : false);
5597 		break;
5598 	default:
5599 		break;
5600 	}
5601 	return 0;
5602 }
5603 
5604 static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5605 {
5606 	u32 rptr;
5607 
5608 	rptr = ring->adev->wb.wb[ring->rptr_offs];
5609 
5610 	return rptr;
5611 }
5612 
5613 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5614 {
5615 	struct amdgpu_device *adev = ring->adev;
5616 	u32 wptr;
5617 
5618 	if (ring->use_doorbell)
5619 		/* XXX check if swapping is necessary on BE */
5620 		wptr = ring->adev->wb.wb[ring->wptr_offs];
5621 	else
5622 		wptr = RREG32(mmCP_RB0_WPTR);
5623 
5624 	return wptr;
5625 }
5626 
5627 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5628 {
5629 	struct amdgpu_device *adev = ring->adev;
5630 
5631 	if (ring->use_doorbell) {
5632 		/* XXX check if swapping is necessary on BE */
5633 		adev->wb.wb[ring->wptr_offs] = ring->wptr;
5634 		WDOORBELL32(ring->doorbell_index, ring->wptr);
5635 	} else {
5636 		WREG32(mmCP_RB0_WPTR, ring->wptr);
5637 		(void)RREG32(mmCP_RB0_WPTR);
5638 	}
5639 }
5640 
5641 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5642 {
5643 	u32 ref_and_mask, reg_mem_engine;
5644 
5645 	if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
5646 		switch (ring->me) {
5647 		case 1:
5648 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
5649 			break;
5650 		case 2:
5651 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
5652 			break;
5653 		default:
5654 			return;
5655 		}
5656 		reg_mem_engine = 0;
5657 	} else {
5658 		ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
5659 		reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
5660 	}
5661 
5662 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5663 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
5664 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
5665 				 reg_mem_engine));
5666 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
5667 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
5668 	amdgpu_ring_write(ring, ref_and_mask);
5669 	amdgpu_ring_write(ring, ref_and_mask);
5670 	amdgpu_ring_write(ring, 0x20); /* poll interval */
5671 }
5672 
5673 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
5674 {
5675 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5676 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5677 				 WRITE_DATA_DST_SEL(0) |
5678 				 WR_CONFIRM));
5679 	amdgpu_ring_write(ring, mmHDP_DEBUG0);
5680 	amdgpu_ring_write(ring, 0);
5681 	amdgpu_ring_write(ring, 1);
5682 
5683 }
5684 
5685 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5686 				      struct amdgpu_ib *ib,
5687 				      unsigned vm_id, bool ctx_switch)
5688 {
5689 	u32 header, control = 0;
5690 	u32 next_rptr = ring->wptr + 5;
5691 
5692 	if (ctx_switch)
5693 		next_rptr += 2;
5694 
5695 	next_rptr += 4;
5696 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5697 	amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
5698 	amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
5699 	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
5700 	amdgpu_ring_write(ring, next_rptr);
5701 
5702 	/* insert SWITCH_BUFFER packet before first IB in the ring frame */
5703 	if (ctx_switch) {
5704 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5705 		amdgpu_ring_write(ring, 0);
5706 	}
5707 
5708 	if (ib->flags & AMDGPU_IB_FLAG_CE)
5709 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5710 	else
5711 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5712 
5713 	control |= ib->length_dw | (vm_id << 24);
5714 
5715 	amdgpu_ring_write(ring, header);
5716 	amdgpu_ring_write(ring,
5717 #ifdef __BIG_ENDIAN
5718 			  (2 << 0) |
5719 #endif
5720 			  (ib->gpu_addr & 0xFFFFFFFC));
5721 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5722 	amdgpu_ring_write(ring, control);
5723 }
5724 
5725 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5726 					  struct amdgpu_ib *ib,
5727 					  unsigned vm_id, bool ctx_switch)
5728 {
5729 	u32 header, control = 0;
5730 	u32 next_rptr = ring->wptr + 5;
5731 
5732 	control |= INDIRECT_BUFFER_VALID;
5733 
5734 	next_rptr += 4;
5735 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5736 	amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
5737 	amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
5738 	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
5739 	amdgpu_ring_write(ring, next_rptr);
5740 
5741 	header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5742 
5743 	control |= ib->length_dw | (vm_id << 24);
5744 
5745 	amdgpu_ring_write(ring, header);
5746 	amdgpu_ring_write(ring,
5747 #ifdef __BIG_ENDIAN
5748 					  (2 << 0) |
5749 #endif
5750 					  (ib->gpu_addr & 0xFFFFFFFC));
5751 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5752 	amdgpu_ring_write(ring, control);
5753 }
5754 
5755 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
5756 					 u64 seq, unsigned flags)
5757 {
5758 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5759 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5760 
5761 	/* EVENT_WRITE_EOP - flush caches, send int */
5762 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
5763 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
5764 				 EOP_TC_ACTION_EN |
5765 				 EOP_TC_WB_ACTION_EN |
5766 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5767 				 EVENT_INDEX(5)));
5768 	amdgpu_ring_write(ring, addr & 0xfffffffc);
5769 	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
5770 			  DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5771 	amdgpu_ring_write(ring, lower_32_bits(seq));
5772 	amdgpu_ring_write(ring, upper_32_bits(seq));
5773 
5774 }
5775 
5776 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5777 {
5778 	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
5779 	uint32_t seq = ring->fence_drv.sync_seq;
5780 	uint64_t addr = ring->fence_drv.gpu_addr;
5781 
5782 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5783 	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
5784 				 WAIT_REG_MEM_FUNCTION(3) | /* equal */
5785 				 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
5786 	amdgpu_ring_write(ring, addr & 0xfffffffc);
5787 	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
5788 	amdgpu_ring_write(ring, seq);
5789 	amdgpu_ring_write(ring, 0xffffffff);
5790 	amdgpu_ring_write(ring, 4); /* poll interval */
5791 
5792 	if (usepfp) {
5793 		/* synce CE with ME to prevent CE fetch CEIB before context switch done */
5794 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5795 		amdgpu_ring_write(ring, 0);
5796 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5797 		amdgpu_ring_write(ring, 0);
5798 	}
5799 }
5800 
5801 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5802 					unsigned vm_id, uint64_t pd_addr)
5803 {
5804 	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
5805 
5806 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5807 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5808 				 WRITE_DATA_DST_SEL(0)) |
5809 				 WR_CONFIRM);
5810 	if (vm_id < 8) {
5811 		amdgpu_ring_write(ring,
5812 				  (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
5813 	} else {
5814 		amdgpu_ring_write(ring,
5815 				  (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
5816 	}
5817 	amdgpu_ring_write(ring, 0);
5818 	amdgpu_ring_write(ring, pd_addr >> 12);
5819 
5820 	/* bits 0-15 are the VM contexts0-15 */
5821 	/* invalidate the cache */
5822 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5823 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5824 				 WRITE_DATA_DST_SEL(0)));
5825 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
5826 	amdgpu_ring_write(ring, 0);
5827 	amdgpu_ring_write(ring, 1 << vm_id);
5828 
5829 	/* wait for the invalidate to complete */
5830 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5831 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5832 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
5833 				 WAIT_REG_MEM_ENGINE(0))); /* me */
5834 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
5835 	amdgpu_ring_write(ring, 0);
5836 	amdgpu_ring_write(ring, 0); /* ref */
5837 	amdgpu_ring_write(ring, 0); /* mask */
5838 	amdgpu_ring_write(ring, 0x20); /* poll interval */
5839 
5840 	/* compute doesn't have PFP */
5841 	if (usepfp) {
5842 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5843 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5844 		amdgpu_ring_write(ring, 0x0);
5845 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5846 		amdgpu_ring_write(ring, 0);
5847 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5848 		amdgpu_ring_write(ring, 0);
5849 	}
5850 }
5851 
5852 static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5853 {
5854 	return ring->adev->wb.wb[ring->rptr_offs];
5855 }
5856 
5857 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5858 {
5859 	return ring->adev->wb.wb[ring->wptr_offs];
5860 }
5861 
5862 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5863 {
5864 	struct amdgpu_device *adev = ring->adev;
5865 
5866 	/* XXX check if swapping is necessary on BE */
5867 	adev->wb.wb[ring->wptr_offs] = ring->wptr;
5868 	WDOORBELL32(ring->doorbell_index, ring->wptr);
5869 }
5870 
5871 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
5872 					     u64 addr, u64 seq,
5873 					     unsigned flags)
5874 {
5875 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5876 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5877 
5878 	/* RELEASE_MEM - flush caches, send int */
5879 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
5880 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
5881 				 EOP_TC_ACTION_EN |
5882 				 EOP_TC_WB_ACTION_EN |
5883 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5884 				 EVENT_INDEX(5)));
5885 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5886 	amdgpu_ring_write(ring, addr & 0xfffffffc);
5887 	amdgpu_ring_write(ring, upper_32_bits(addr));
5888 	amdgpu_ring_write(ring, lower_32_bits(seq));
5889 	amdgpu_ring_write(ring, upper_32_bits(seq));
5890 }
5891 
5892 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5893 						 enum amdgpu_interrupt_state state)
5894 {
5895 	u32 cp_int_cntl;
5896 
5897 	switch (state) {
5898 	case AMDGPU_IRQ_STATE_DISABLE:
5899 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5900 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5901 					    TIME_STAMP_INT_ENABLE, 0);
5902 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5903 		break;
5904 	case AMDGPU_IRQ_STATE_ENABLE:
5905 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5906 		cp_int_cntl =
5907 			REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5908 				      TIME_STAMP_INT_ENABLE, 1);
5909 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5910 		break;
5911 	default:
5912 		break;
5913 	}
5914 }
5915 
5916 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5917 						     int me, int pipe,
5918 						     enum amdgpu_interrupt_state state)
5919 {
5920 	u32 mec_int_cntl, mec_int_cntl_reg;
5921 
5922 	/*
5923 	 * amdgpu controls only pipe 0 of MEC1. That's why this function only
5924 	 * handles the setting of interrupts for this specific pipe. All other
5925 	 * pipes' interrupts are set by amdkfd.
5926 	 */
5927 
5928 	if (me == 1) {
5929 		switch (pipe) {
5930 		case 0:
5931 			mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
5932 			break;
5933 		default:
5934 			DRM_DEBUG("invalid pipe %d\n", pipe);
5935 			return;
5936 		}
5937 	} else {
5938 		DRM_DEBUG("invalid me %d\n", me);
5939 		return;
5940 	}
5941 
5942 	switch (state) {
5943 	case AMDGPU_IRQ_STATE_DISABLE:
5944 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5945 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5946 					     TIME_STAMP_INT_ENABLE, 0);
5947 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5948 		break;
5949 	case AMDGPU_IRQ_STATE_ENABLE:
5950 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5951 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5952 					     TIME_STAMP_INT_ENABLE, 1);
5953 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5954 		break;
5955 	default:
5956 		break;
5957 	}
5958 }
5959 
5960 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5961 					     struct amdgpu_irq_src *source,
5962 					     unsigned type,
5963 					     enum amdgpu_interrupt_state state)
5964 {
5965 	u32 cp_int_cntl;
5966 
5967 	switch (state) {
5968 	case AMDGPU_IRQ_STATE_DISABLE:
5969 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5970 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5971 					    PRIV_REG_INT_ENABLE, 0);
5972 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5973 		break;
5974 	case AMDGPU_IRQ_STATE_ENABLE:
5975 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5976 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5977 					    PRIV_REG_INT_ENABLE, 1);
5978 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5979 		break;
5980 	default:
5981 		break;
5982 	}
5983 
5984 	return 0;
5985 }
5986 
5987 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5988 					      struct amdgpu_irq_src *source,
5989 					      unsigned type,
5990 					      enum amdgpu_interrupt_state state)
5991 {
5992 	u32 cp_int_cntl;
5993 
5994 	switch (state) {
5995 	case AMDGPU_IRQ_STATE_DISABLE:
5996 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5997 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5998 					    PRIV_INSTR_INT_ENABLE, 0);
5999 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6000 		break;
6001 	case AMDGPU_IRQ_STATE_ENABLE:
6002 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6003 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6004 					    PRIV_INSTR_INT_ENABLE, 1);
6005 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6006 		break;
6007 	default:
6008 		break;
6009 	}
6010 
6011 	return 0;
6012 }
6013 
6014 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6015 					    struct amdgpu_irq_src *src,
6016 					    unsigned type,
6017 					    enum amdgpu_interrupt_state state)
6018 {
6019 	switch (type) {
6020 	case AMDGPU_CP_IRQ_GFX_EOP:
6021 		gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6022 		break;
6023 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6024 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6025 		break;
6026 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6027 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6028 		break;
6029 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6030 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6031 		break;
6032 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6033 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6034 		break;
6035 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6036 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6037 		break;
6038 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6039 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6040 		break;
6041 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6042 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6043 		break;
6044 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6045 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6046 		break;
6047 	default:
6048 		break;
6049 	}
6050 	return 0;
6051 }
6052 
6053 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6054 			    struct amdgpu_irq_src *source,
6055 			    struct amdgpu_iv_entry *entry)
6056 {
6057 	int i;
6058 	u8 me_id, pipe_id, queue_id;
6059 	struct amdgpu_ring *ring;
6060 
6061 	DRM_DEBUG("IH: CP EOP\n");
6062 	me_id = (entry->ring_id & 0x0c) >> 2;
6063 	pipe_id = (entry->ring_id & 0x03) >> 0;
6064 	queue_id = (entry->ring_id & 0x70) >> 4;
6065 
6066 	switch (me_id) {
6067 	case 0:
6068 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6069 		break;
6070 	case 1:
6071 	case 2:
6072 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6073 			ring = &adev->gfx.compute_ring[i];
6074 			/* Per-queue interrupt is supported for MEC starting from VI.
6075 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
6076 			  */
6077 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6078 				amdgpu_fence_process(ring);
6079 		}
6080 		break;
6081 	}
6082 	return 0;
6083 }
6084 
6085 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6086 				 struct amdgpu_irq_src *source,
6087 				 struct amdgpu_iv_entry *entry)
6088 {
6089 	DRM_ERROR("Illegal register access in command stream\n");
6090 	schedule_work(&adev->reset_work);
6091 	return 0;
6092 }
6093 
6094 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6095 				  struct amdgpu_irq_src *source,
6096 				  struct amdgpu_iv_entry *entry)
6097 {
6098 	DRM_ERROR("Illegal instruction in command stream\n");
6099 	schedule_work(&adev->reset_work);
6100 	return 0;
6101 }
6102 
6103 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6104 	.name = "gfx_v8_0",
6105 	.early_init = gfx_v8_0_early_init,
6106 	.late_init = gfx_v8_0_late_init,
6107 	.sw_init = gfx_v8_0_sw_init,
6108 	.sw_fini = gfx_v8_0_sw_fini,
6109 	.hw_init = gfx_v8_0_hw_init,
6110 	.hw_fini = gfx_v8_0_hw_fini,
6111 	.suspend = gfx_v8_0_suspend,
6112 	.resume = gfx_v8_0_resume,
6113 	.is_idle = gfx_v8_0_is_idle,
6114 	.wait_for_idle = gfx_v8_0_wait_for_idle,
6115 	.soft_reset = gfx_v8_0_soft_reset,
6116 	.set_clockgating_state = gfx_v8_0_set_clockgating_state,
6117 	.set_powergating_state = gfx_v8_0_set_powergating_state,
6118 };
6119 
6120 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6121 	.get_rptr = gfx_v8_0_ring_get_rptr_gfx,
6122 	.get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6123 	.set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6124 	.parse_cs = NULL,
6125 	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6126 	.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6127 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6128 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6129 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6130 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6131 	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6132 	.test_ring = gfx_v8_0_ring_test_ring,
6133 	.test_ib = gfx_v8_0_ring_test_ib,
6134 	.insert_nop = amdgpu_ring_insert_nop,
6135 	.pad_ib = amdgpu_ring_generic_pad_ib,
6136 };
6137 
6138 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6139 	.get_rptr = gfx_v8_0_ring_get_rptr_compute,
6140 	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
6141 	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
6142 	.parse_cs = NULL,
6143 	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
6144 	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
6145 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6146 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6147 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6148 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6149 	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6150 	.test_ring = gfx_v8_0_ring_test_ring,
6151 	.test_ib = gfx_v8_0_ring_test_ib,
6152 	.insert_nop = amdgpu_ring_insert_nop,
6153 	.pad_ib = amdgpu_ring_generic_pad_ib,
6154 };
6155 
6156 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6157 {
6158 	int i;
6159 
6160 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6161 		adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6162 
6163 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6164 		adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6165 }
6166 
6167 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6168 	.set = gfx_v8_0_set_eop_interrupt_state,
6169 	.process = gfx_v8_0_eop_irq,
6170 };
6171 
6172 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6173 	.set = gfx_v8_0_set_priv_reg_fault_state,
6174 	.process = gfx_v8_0_priv_reg_irq,
6175 };
6176 
6177 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6178 	.set = gfx_v8_0_set_priv_inst_fault_state,
6179 	.process = gfx_v8_0_priv_inst_irq,
6180 };
6181 
6182 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6183 {
6184 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6185 	adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6186 
6187 	adev->gfx.priv_reg_irq.num_types = 1;
6188 	adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6189 
6190 	adev->gfx.priv_inst_irq.num_types = 1;
6191 	adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6192 }
6193 
6194 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6195 {
6196 	switch (adev->asic_type) {
6197 	case CHIP_TOPAZ:
6198 	case CHIP_STONEY:
6199 		adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6200 		break;
6201 	case CHIP_CARRIZO:
6202 		adev->gfx.rlc.funcs = &cz_rlc_funcs;
6203 		break;
6204 	default:
6205 		adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
6206 		break;
6207 	}
6208 }
6209 
6210 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6211 {
6212 	/* init asci gds info */
6213 	adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6214 	adev->gds.gws.total_size = 64;
6215 	adev->gds.oa.total_size = 16;
6216 
6217 	if (adev->gds.mem.total_size == 64 * 1024) {
6218 		adev->gds.mem.gfx_partition_size = 4096;
6219 		adev->gds.mem.cs_partition_size = 4096;
6220 
6221 		adev->gds.gws.gfx_partition_size = 4;
6222 		adev->gds.gws.cs_partition_size = 4;
6223 
6224 		adev->gds.oa.gfx_partition_size = 4;
6225 		adev->gds.oa.cs_partition_size = 1;
6226 	} else {
6227 		adev->gds.mem.gfx_partition_size = 1024;
6228 		adev->gds.mem.cs_partition_size = 1024;
6229 
6230 		adev->gds.gws.gfx_partition_size = 16;
6231 		adev->gds.gws.cs_partition_size = 16;
6232 
6233 		adev->gds.oa.gfx_partition_size = 4;
6234 		adev->gds.oa.cs_partition_size = 4;
6235 	}
6236 }
6237 
6238 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6239 {
6240 	u32 data, mask;
6241 
6242 	data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
6243 	data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6244 
6245 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6246 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6247 
6248 	mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
6249 
6250 	return (~data) & mask;
6251 }
6252 
6253 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6254 {
6255 	int i, j, k, counter, active_cu_number = 0;
6256 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6257 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6258 
6259 	memset(cu_info, 0, sizeof(*cu_info));
6260 
6261 	mutex_lock(&adev->grbm_idx_mutex);
6262 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6263 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6264 			mask = 1;
6265 			ao_bitmap = 0;
6266 			counter = 0;
6267 			gfx_v8_0_select_se_sh(adev, i, j);
6268 			bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6269 			cu_info->bitmap[i][j] = bitmap;
6270 
6271 			for (k = 0; k < 16; k ++) {
6272 				if (bitmap & mask) {
6273 					if (counter < 2)
6274 						ao_bitmap |= mask;
6275 					counter ++;
6276 				}
6277 				mask <<= 1;
6278 			}
6279 			active_cu_number += counter;
6280 			ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6281 		}
6282 	}
6283 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
6284 	mutex_unlock(&adev->grbm_idx_mutex);
6285 
6286 	cu_info->number = active_cu_number;
6287 	cu_info->ao_cu_mask = ao_cu_mask;
6288 }
6289