xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c (revision 6dfcd296)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vid.h"
29 #include "amdgpu_ucode.h"
30 #include "amdgpu_atombios.h"
31 #include "atombios_i2c.h"
32 #include "clearstate_vi.h"
33 
34 #include "gmc/gmc_8_2_d.h"
35 #include "gmc/gmc_8_2_sh_mask.h"
36 
37 #include "oss/oss_3_0_d.h"
38 #include "oss/oss_3_0_sh_mask.h"
39 
40 #include "bif/bif_5_0_d.h"
41 #include "bif/bif_5_0_sh_mask.h"
42 
43 #include "gca/gfx_8_0_d.h"
44 #include "gca/gfx_8_0_enum.h"
45 #include "gca/gfx_8_0_sh_mask.h"
46 #include "gca/gfx_8_0_enum.h"
47 
48 #include "dce/dce_10_0_d.h"
49 #include "dce/dce_10_0_sh_mask.h"
50 
51 #include "smu/smu_7_1_3_d.h"
52 
53 #define GFX8_NUM_GFX_RINGS     1
54 #define GFX8_NUM_COMPUTE_RINGS 8
55 
56 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
59 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
60 
61 #define ARRAY_MODE(x)					((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
62 #define PIPE_CONFIG(x)					((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
63 #define TILE_SPLIT(x)					((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
64 #define MICRO_TILE_MODE_NEW(x)				((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
65 #define SAMPLE_SPLIT(x)					((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
66 #define BANK_WIDTH(x)					((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
67 #define BANK_HEIGHT(x)					((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
68 #define MACRO_TILE_ASPECT(x)				((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
69 #define NUM_BANKS(x)					((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
70 
71 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
72 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
73 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
74 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
76 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
77 
78 /* BPM SERDES CMD */
79 #define SET_BPM_SERDES_CMD    1
80 #define CLE_BPM_SERDES_CMD    0
81 
82 /* BPM Register Address*/
83 enum {
84 	BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
85 	BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
86 	BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
87 	BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
88 	BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
89 	BPM_REG_FGCG_MAX
90 };
91 
92 #define RLC_FormatDirectRegListLength        14
93 
94 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
95 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
100 
101 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
106 
107 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
113 
114 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
119 
120 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
126 
127 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
128 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
133 
134 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
140 
141 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
142 {
143 	{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
144 	{mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
145 	{mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
146 	{mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
147 	{mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
148 	{mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
149 	{mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
150 	{mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
151 	{mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
152 	{mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
153 	{mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
154 	{mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
155 	{mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
156 	{mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
157 	{mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
158 	{mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
159 };
160 
161 static const u32 golden_settings_tonga_a11[] =
162 {
163 	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
164 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
165 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
166 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
167 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
168 	mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
169 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
170 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
171 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
172 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
173 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
174 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
175 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
176 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
177 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
178 };
179 
180 static const u32 tonga_golden_common_all[] =
181 {
182 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
183 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
184 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
185 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
186 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
187 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
188 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
189 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
190 };
191 
192 static const u32 tonga_mgcg_cgcg_init[] =
193 {
194 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
195 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
196 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
197 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
198 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
199 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
200 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
201 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
202 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
203 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
204 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
205 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
206 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
207 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
208 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
209 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
210 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
211 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
212 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
213 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
214 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
215 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
216 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
217 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
218 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
219 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
220 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
221 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
222 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
223 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
224 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
225 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
226 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
227 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
228 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
229 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
230 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
231 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
232 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
233 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
234 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
235 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
236 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
237 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
238 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
239 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
240 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
241 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
242 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
243 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
244 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
245 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
246 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
247 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
248 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
249 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
250 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
251 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
252 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
253 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
254 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
255 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
256 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
257 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
258 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
259 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
260 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
261 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
262 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
263 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
264 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
265 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
266 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
267 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
268 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
269 };
270 
271 static const u32 golden_settings_polaris11_a11[] =
272 {
273 	mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
274 	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
275 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
276 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
277 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
278 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
279 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
280 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
281 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
282 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
283 	mmSQ_CONFIG, 0x07f80000, 0x01180000,
284 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
285 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
286 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
287 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
288 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
289 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
290 };
291 
292 static const u32 polaris11_golden_common_all[] =
293 {
294 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
295 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
296 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
297 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
298 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
299 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
300 };
301 
302 static const u32 golden_settings_polaris10_a11[] =
303 {
304 	mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
305 	mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
306 	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
307 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
308 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
309 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
310 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
311 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
312 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
313 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
314 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
315 	mmSQ_CONFIG, 0x07f80000, 0x07180000,
316 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
317 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
318 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
319 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
320 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
321 };
322 
323 static const u32 polaris10_golden_common_all[] =
324 {
325 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
326 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
327 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
328 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
329 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
330 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
331 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
332 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
333 };
334 
335 static const u32 fiji_golden_common_all[] =
336 {
337 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
338 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
339 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
340 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
341 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
342 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
343 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
344 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
345 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
346 	mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
347 };
348 
349 static const u32 golden_settings_fiji_a10[] =
350 {
351 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
352 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
353 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
354 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
355 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
356 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
357 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
358 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
359 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
360 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
361 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
362 };
363 
364 static const u32 fiji_mgcg_cgcg_init[] =
365 {
366 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
367 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
368 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
369 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
370 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
371 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
372 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
373 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
374 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
375 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
376 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
377 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
378 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
379 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
380 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
381 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
382 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
383 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
384 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
385 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
386 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
387 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
388 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
389 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
390 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
391 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
392 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
393 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
394 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
395 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
396 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
397 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
398 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
399 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
400 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
401 };
402 
403 static const u32 golden_settings_iceland_a11[] =
404 {
405 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
406 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
407 	mmDB_DEBUG3, 0xc0000000, 0xc0000000,
408 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
409 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
410 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
411 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
412 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
413 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
414 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
415 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
416 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
417 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
418 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
419 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
420 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
421 };
422 
423 static const u32 iceland_golden_common_all[] =
424 {
425 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
426 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
427 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
428 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
429 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
430 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
431 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
432 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
433 };
434 
435 static const u32 iceland_mgcg_cgcg_init[] =
436 {
437 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
438 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
439 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
440 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
441 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
442 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
443 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
444 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
445 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
446 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
447 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
448 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
449 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
450 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
451 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
452 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
453 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
454 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
455 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
456 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
457 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
458 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
459 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
460 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
461 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
462 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
463 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
464 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
465 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
466 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
467 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
468 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
469 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
470 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
471 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
472 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
473 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
474 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
475 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
476 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
477 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
478 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
479 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
480 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
481 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
482 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
483 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
484 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
485 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
486 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
487 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
488 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
489 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
490 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
491 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
492 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
493 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
494 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
495 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
496 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
497 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
498 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
499 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
500 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
501 };
502 
503 static const u32 cz_golden_settings_a11[] =
504 {
505 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
506 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
507 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
508 	mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
509 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
510 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
511 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
512 	mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
513 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
514 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
515 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
516 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
517 };
518 
519 static const u32 cz_golden_common_all[] =
520 {
521 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
522 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
523 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
524 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
525 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
526 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
527 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
528 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
529 };
530 
531 static const u32 cz_mgcg_cgcg_init[] =
532 {
533 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
534 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
535 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
536 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
537 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
538 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
539 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
540 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
541 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
542 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
543 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
544 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
545 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
546 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
547 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
548 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
549 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
550 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
551 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
552 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
553 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
554 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
555 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
556 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
557 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
558 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
559 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
560 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
561 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
562 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
563 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
564 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
565 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
566 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
567 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
568 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
569 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
570 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
571 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
572 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
573 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
574 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
575 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
576 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
577 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
578 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
579 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
580 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
581 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
582 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
583 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
584 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
585 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
586 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
587 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
588 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
589 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
590 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
591 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
592 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
593 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
594 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
595 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
596 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
597 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
598 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
599 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
600 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
601 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
602 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
603 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
604 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
605 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
606 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
607 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
608 };
609 
610 static const u32 stoney_golden_settings_a11[] =
611 {
612 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
613 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
614 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
615 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
616 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
617 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
618 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
619 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
620 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
621 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
622 };
623 
624 static const u32 stoney_golden_common_all[] =
625 {
626 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
627 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
628 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
629 	mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
630 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
631 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
632 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
633 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
634 };
635 
636 static const u32 stoney_mgcg_cgcg_init[] =
637 {
638 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
639 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
640 	mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
641 	mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
642 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
643 	mmATC_MISC_CG, 0xffffffff, 0x000c0200,
644 };
645 
646 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
647 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
648 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
649 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
650 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
651 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
652 
653 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
654 {
655 	switch (adev->asic_type) {
656 	case CHIP_TOPAZ:
657 		amdgpu_program_register_sequence(adev,
658 						 iceland_mgcg_cgcg_init,
659 						 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
660 		amdgpu_program_register_sequence(adev,
661 						 golden_settings_iceland_a11,
662 						 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
663 		amdgpu_program_register_sequence(adev,
664 						 iceland_golden_common_all,
665 						 (const u32)ARRAY_SIZE(iceland_golden_common_all));
666 		break;
667 	case CHIP_FIJI:
668 		amdgpu_program_register_sequence(adev,
669 						 fiji_mgcg_cgcg_init,
670 						 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
671 		amdgpu_program_register_sequence(adev,
672 						 golden_settings_fiji_a10,
673 						 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
674 		amdgpu_program_register_sequence(adev,
675 						 fiji_golden_common_all,
676 						 (const u32)ARRAY_SIZE(fiji_golden_common_all));
677 		break;
678 
679 	case CHIP_TONGA:
680 		amdgpu_program_register_sequence(adev,
681 						 tonga_mgcg_cgcg_init,
682 						 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
683 		amdgpu_program_register_sequence(adev,
684 						 golden_settings_tonga_a11,
685 						 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
686 		amdgpu_program_register_sequence(adev,
687 						 tonga_golden_common_all,
688 						 (const u32)ARRAY_SIZE(tonga_golden_common_all));
689 		break;
690 	case CHIP_POLARIS11:
691 		amdgpu_program_register_sequence(adev,
692 						 golden_settings_polaris11_a11,
693 						 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
694 		amdgpu_program_register_sequence(adev,
695 						 polaris11_golden_common_all,
696 						 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
697 		break;
698 	case CHIP_POLARIS10:
699 		amdgpu_program_register_sequence(adev,
700 						 golden_settings_polaris10_a11,
701 						 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
702 		amdgpu_program_register_sequence(adev,
703 						 polaris10_golden_common_all,
704 						 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
705 		WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
706 		if (adev->pdev->revision == 0xc7 &&
707 		    ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
708 		     (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
709 		     (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
710 			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
711 			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
712 		}
713 		break;
714 	case CHIP_CARRIZO:
715 		amdgpu_program_register_sequence(adev,
716 						 cz_mgcg_cgcg_init,
717 						 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
718 		amdgpu_program_register_sequence(adev,
719 						 cz_golden_settings_a11,
720 						 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
721 		amdgpu_program_register_sequence(adev,
722 						 cz_golden_common_all,
723 						 (const u32)ARRAY_SIZE(cz_golden_common_all));
724 		break;
725 	case CHIP_STONEY:
726 		amdgpu_program_register_sequence(adev,
727 						 stoney_mgcg_cgcg_init,
728 						 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
729 		amdgpu_program_register_sequence(adev,
730 						 stoney_golden_settings_a11,
731 						 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
732 		amdgpu_program_register_sequence(adev,
733 						 stoney_golden_common_all,
734 						 (const u32)ARRAY_SIZE(stoney_golden_common_all));
735 		break;
736 	default:
737 		break;
738 	}
739 }
740 
741 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
742 {
743 	int i;
744 
745 	adev->gfx.scratch.num_reg = 7;
746 	adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
747 	for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
748 		adev->gfx.scratch.free[i] = true;
749 		adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
750 	}
751 }
752 
753 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
754 {
755 	struct amdgpu_device *adev = ring->adev;
756 	uint32_t scratch;
757 	uint32_t tmp = 0;
758 	unsigned i;
759 	int r;
760 
761 	r = amdgpu_gfx_scratch_get(adev, &scratch);
762 	if (r) {
763 		DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
764 		return r;
765 	}
766 	WREG32(scratch, 0xCAFEDEAD);
767 	r = amdgpu_ring_alloc(ring, 3);
768 	if (r) {
769 		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
770 			  ring->idx, r);
771 		amdgpu_gfx_scratch_free(adev, scratch);
772 		return r;
773 	}
774 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
775 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
776 	amdgpu_ring_write(ring, 0xDEADBEEF);
777 	amdgpu_ring_commit(ring);
778 
779 	for (i = 0; i < adev->usec_timeout; i++) {
780 		tmp = RREG32(scratch);
781 		if (tmp == 0xDEADBEEF)
782 			break;
783 		DRM_UDELAY(1);
784 	}
785 	if (i < adev->usec_timeout) {
786 		DRM_INFO("ring test on %d succeeded in %d usecs\n",
787 			 ring->idx, i);
788 	} else {
789 		DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
790 			  ring->idx, scratch, tmp);
791 		r = -EINVAL;
792 	}
793 	amdgpu_gfx_scratch_free(adev, scratch);
794 	return r;
795 }
796 
797 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
798 {
799 	struct amdgpu_device *adev = ring->adev;
800 	struct amdgpu_ib ib;
801 	struct fence *f = NULL;
802 	uint32_t scratch;
803 	uint32_t tmp = 0;
804 	long r;
805 
806 	r = amdgpu_gfx_scratch_get(adev, &scratch);
807 	if (r) {
808 		DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
809 		return r;
810 	}
811 	WREG32(scratch, 0xCAFEDEAD);
812 	memset(&ib, 0, sizeof(ib));
813 	r = amdgpu_ib_get(adev, NULL, 256, &ib);
814 	if (r) {
815 		DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
816 		goto err1;
817 	}
818 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
819 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
820 	ib.ptr[2] = 0xDEADBEEF;
821 	ib.length_dw = 3;
822 
823 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
824 	if (r)
825 		goto err2;
826 
827 	r = fence_wait_timeout(f, false, timeout);
828 	if (r == 0) {
829 		DRM_ERROR("amdgpu: IB test timed out.\n");
830 		r = -ETIMEDOUT;
831 		goto err2;
832 	} else if (r < 0) {
833 		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
834 		goto err2;
835 	}
836 	tmp = RREG32(scratch);
837 	if (tmp == 0xDEADBEEF) {
838 		DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
839 		r = 0;
840 	} else {
841 		DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
842 			  scratch, tmp);
843 		r = -EINVAL;
844 	}
845 err2:
846 	amdgpu_ib_free(adev, &ib, NULL);
847 	fence_put(f);
848 err1:
849 	amdgpu_gfx_scratch_free(adev, scratch);
850 	return r;
851 }
852 
853 
854 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
855 	release_firmware(adev->gfx.pfp_fw);
856 	adev->gfx.pfp_fw = NULL;
857 	release_firmware(adev->gfx.me_fw);
858 	adev->gfx.me_fw = NULL;
859 	release_firmware(adev->gfx.ce_fw);
860 	adev->gfx.ce_fw = NULL;
861 	release_firmware(adev->gfx.rlc_fw);
862 	adev->gfx.rlc_fw = NULL;
863 	release_firmware(adev->gfx.mec_fw);
864 	adev->gfx.mec_fw = NULL;
865 	if ((adev->asic_type != CHIP_STONEY) &&
866 	    (adev->asic_type != CHIP_TOPAZ))
867 		release_firmware(adev->gfx.mec2_fw);
868 	adev->gfx.mec2_fw = NULL;
869 
870 	kfree(adev->gfx.rlc.register_list_format);
871 }
872 
873 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
874 {
875 	const char *chip_name;
876 	char fw_name[30];
877 	int err;
878 	struct amdgpu_firmware_info *info = NULL;
879 	const struct common_firmware_header *header = NULL;
880 	const struct gfx_firmware_header_v1_0 *cp_hdr;
881 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
882 	unsigned int *tmp = NULL, i;
883 
884 	DRM_DEBUG("\n");
885 
886 	switch (adev->asic_type) {
887 	case CHIP_TOPAZ:
888 		chip_name = "topaz";
889 		break;
890 	case CHIP_TONGA:
891 		chip_name = "tonga";
892 		break;
893 	case CHIP_CARRIZO:
894 		chip_name = "carrizo";
895 		break;
896 	case CHIP_FIJI:
897 		chip_name = "fiji";
898 		break;
899 	case CHIP_POLARIS11:
900 		chip_name = "polaris11";
901 		break;
902 	case CHIP_POLARIS10:
903 		chip_name = "polaris10";
904 		break;
905 	case CHIP_STONEY:
906 		chip_name = "stoney";
907 		break;
908 	default:
909 		BUG();
910 	}
911 
912 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
913 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
914 	if (err)
915 		goto out;
916 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
917 	if (err)
918 		goto out;
919 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
920 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
921 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
922 
923 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
924 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
925 	if (err)
926 		goto out;
927 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
928 	if (err)
929 		goto out;
930 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
931 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
932 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
933 
934 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
935 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
936 	if (err)
937 		goto out;
938 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
939 	if (err)
940 		goto out;
941 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
942 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
943 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
944 
945 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
946 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
947 	if (err)
948 		goto out;
949 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
950 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
951 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
952 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
953 
954 	adev->gfx.rlc.save_and_restore_offset =
955 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
956 	adev->gfx.rlc.clear_state_descriptor_offset =
957 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
958 	adev->gfx.rlc.avail_scratch_ram_locations =
959 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
960 	adev->gfx.rlc.reg_restore_list_size =
961 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
962 	adev->gfx.rlc.reg_list_format_start =
963 			le32_to_cpu(rlc_hdr->reg_list_format_start);
964 	adev->gfx.rlc.reg_list_format_separate_start =
965 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
966 	adev->gfx.rlc.starting_offsets_start =
967 			le32_to_cpu(rlc_hdr->starting_offsets_start);
968 	adev->gfx.rlc.reg_list_format_size_bytes =
969 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
970 	adev->gfx.rlc.reg_list_size_bytes =
971 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
972 
973 	adev->gfx.rlc.register_list_format =
974 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
975 					adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
976 
977 	if (!adev->gfx.rlc.register_list_format) {
978 		err = -ENOMEM;
979 		goto out;
980 	}
981 
982 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
983 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
984 	for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
985 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
986 
987 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
988 
989 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
990 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
991 	for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
992 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
993 
994 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
995 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
996 	if (err)
997 		goto out;
998 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
999 	if (err)
1000 		goto out;
1001 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1002 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1003 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1004 
1005 	if ((adev->asic_type != CHIP_STONEY) &&
1006 	    (adev->asic_type != CHIP_TOPAZ)) {
1007 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1008 		err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1009 		if (!err) {
1010 			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1011 			if (err)
1012 				goto out;
1013 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1014 				adev->gfx.mec2_fw->data;
1015 			adev->gfx.mec2_fw_version =
1016 				le32_to_cpu(cp_hdr->header.ucode_version);
1017 			adev->gfx.mec2_feature_version =
1018 				le32_to_cpu(cp_hdr->ucode_feature_version);
1019 		} else {
1020 			err = 0;
1021 			adev->gfx.mec2_fw = NULL;
1022 		}
1023 	}
1024 
1025 	if (adev->firmware.smu_load) {
1026 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1027 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1028 		info->fw = adev->gfx.pfp_fw;
1029 		header = (const struct common_firmware_header *)info->fw->data;
1030 		adev->firmware.fw_size +=
1031 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1032 
1033 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1034 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1035 		info->fw = adev->gfx.me_fw;
1036 		header = (const struct common_firmware_header *)info->fw->data;
1037 		adev->firmware.fw_size +=
1038 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1039 
1040 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1041 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1042 		info->fw = adev->gfx.ce_fw;
1043 		header = (const struct common_firmware_header *)info->fw->data;
1044 		adev->firmware.fw_size +=
1045 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1046 
1047 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1048 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1049 		info->fw = adev->gfx.rlc_fw;
1050 		header = (const struct common_firmware_header *)info->fw->data;
1051 		adev->firmware.fw_size +=
1052 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1053 
1054 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1055 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1056 		info->fw = adev->gfx.mec_fw;
1057 		header = (const struct common_firmware_header *)info->fw->data;
1058 		adev->firmware.fw_size +=
1059 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1060 
1061 		if (adev->gfx.mec2_fw) {
1062 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1063 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1064 			info->fw = adev->gfx.mec2_fw;
1065 			header = (const struct common_firmware_header *)info->fw->data;
1066 			adev->firmware.fw_size +=
1067 				ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1068 		}
1069 
1070 	}
1071 
1072 out:
1073 	if (err) {
1074 		dev_err(adev->dev,
1075 			"gfx8: Failed to load firmware \"%s\"\n",
1076 			fw_name);
1077 		release_firmware(adev->gfx.pfp_fw);
1078 		adev->gfx.pfp_fw = NULL;
1079 		release_firmware(adev->gfx.me_fw);
1080 		adev->gfx.me_fw = NULL;
1081 		release_firmware(adev->gfx.ce_fw);
1082 		adev->gfx.ce_fw = NULL;
1083 		release_firmware(adev->gfx.rlc_fw);
1084 		adev->gfx.rlc_fw = NULL;
1085 		release_firmware(adev->gfx.mec_fw);
1086 		adev->gfx.mec_fw = NULL;
1087 		release_firmware(adev->gfx.mec2_fw);
1088 		adev->gfx.mec2_fw = NULL;
1089 	}
1090 	return err;
1091 }
1092 
1093 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1094 				    volatile u32 *buffer)
1095 {
1096 	u32 count = 0, i;
1097 	const struct cs_section_def *sect = NULL;
1098 	const struct cs_extent_def *ext = NULL;
1099 
1100 	if (adev->gfx.rlc.cs_data == NULL)
1101 		return;
1102 	if (buffer == NULL)
1103 		return;
1104 
1105 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1106 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1107 
1108 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1109 	buffer[count++] = cpu_to_le32(0x80000000);
1110 	buffer[count++] = cpu_to_le32(0x80000000);
1111 
1112 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1113 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1114 			if (sect->id == SECT_CONTEXT) {
1115 				buffer[count++] =
1116 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1117 				buffer[count++] = cpu_to_le32(ext->reg_index -
1118 						PACKET3_SET_CONTEXT_REG_START);
1119 				for (i = 0; i < ext->reg_count; i++)
1120 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1121 			} else {
1122 				return;
1123 			}
1124 		}
1125 	}
1126 
1127 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1128 	buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1129 			PACKET3_SET_CONTEXT_REG_START);
1130 	switch (adev->asic_type) {
1131 	case CHIP_TONGA:
1132 	case CHIP_POLARIS10:
1133 		buffer[count++] = cpu_to_le32(0x16000012);
1134 		buffer[count++] = cpu_to_le32(0x0000002A);
1135 		break;
1136 	case CHIP_POLARIS11:
1137 		buffer[count++] = cpu_to_le32(0x16000012);
1138 		buffer[count++] = cpu_to_le32(0x00000000);
1139 		break;
1140 	case CHIP_FIJI:
1141 		buffer[count++] = cpu_to_le32(0x3a00161a);
1142 		buffer[count++] = cpu_to_le32(0x0000002e);
1143 		break;
1144 	case CHIP_TOPAZ:
1145 	case CHIP_CARRIZO:
1146 		buffer[count++] = cpu_to_le32(0x00000002);
1147 		buffer[count++] = cpu_to_le32(0x00000000);
1148 		break;
1149 	case CHIP_STONEY:
1150 		buffer[count++] = cpu_to_le32(0x00000000);
1151 		buffer[count++] = cpu_to_le32(0x00000000);
1152 		break;
1153 	default:
1154 		buffer[count++] = cpu_to_le32(0x00000000);
1155 		buffer[count++] = cpu_to_le32(0x00000000);
1156 		break;
1157 	}
1158 
1159 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1160 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1161 
1162 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1163 	buffer[count++] = cpu_to_le32(0);
1164 }
1165 
1166 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1167 {
1168 	const __le32 *fw_data;
1169 	volatile u32 *dst_ptr;
1170 	int me, i, max_me = 4;
1171 	u32 bo_offset = 0;
1172 	u32 table_offset, table_size;
1173 
1174 	if (adev->asic_type == CHIP_CARRIZO)
1175 		max_me = 5;
1176 
1177 	/* write the cp table buffer */
1178 	dst_ptr = adev->gfx.rlc.cp_table_ptr;
1179 	for (me = 0; me < max_me; me++) {
1180 		if (me == 0) {
1181 			const struct gfx_firmware_header_v1_0 *hdr =
1182 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1183 			fw_data = (const __le32 *)
1184 				(adev->gfx.ce_fw->data +
1185 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1186 			table_offset = le32_to_cpu(hdr->jt_offset);
1187 			table_size = le32_to_cpu(hdr->jt_size);
1188 		} else if (me == 1) {
1189 			const struct gfx_firmware_header_v1_0 *hdr =
1190 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1191 			fw_data = (const __le32 *)
1192 				(adev->gfx.pfp_fw->data +
1193 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1194 			table_offset = le32_to_cpu(hdr->jt_offset);
1195 			table_size = le32_to_cpu(hdr->jt_size);
1196 		} else if (me == 2) {
1197 			const struct gfx_firmware_header_v1_0 *hdr =
1198 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1199 			fw_data = (const __le32 *)
1200 				(adev->gfx.me_fw->data +
1201 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1202 			table_offset = le32_to_cpu(hdr->jt_offset);
1203 			table_size = le32_to_cpu(hdr->jt_size);
1204 		} else if (me == 3) {
1205 			const struct gfx_firmware_header_v1_0 *hdr =
1206 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1207 			fw_data = (const __le32 *)
1208 				(adev->gfx.mec_fw->data +
1209 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1210 			table_offset = le32_to_cpu(hdr->jt_offset);
1211 			table_size = le32_to_cpu(hdr->jt_size);
1212 		} else  if (me == 4) {
1213 			const struct gfx_firmware_header_v1_0 *hdr =
1214 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1215 			fw_data = (const __le32 *)
1216 				(adev->gfx.mec2_fw->data +
1217 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1218 			table_offset = le32_to_cpu(hdr->jt_offset);
1219 			table_size = le32_to_cpu(hdr->jt_size);
1220 		}
1221 
1222 		for (i = 0; i < table_size; i ++) {
1223 			dst_ptr[bo_offset + i] =
1224 				cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1225 		}
1226 
1227 		bo_offset += table_size;
1228 	}
1229 }
1230 
1231 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1232 {
1233 	int r;
1234 
1235 	/* clear state block */
1236 	if (adev->gfx.rlc.clear_state_obj) {
1237 		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1238 		if (unlikely(r != 0))
1239 			dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
1240 		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1241 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1242 		amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1243 		adev->gfx.rlc.clear_state_obj = NULL;
1244 	}
1245 
1246 	/* jump table block */
1247 	if (adev->gfx.rlc.cp_table_obj) {
1248 		r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1249 		if (unlikely(r != 0))
1250 			dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1251 		amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1252 		amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1253 		amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1254 		adev->gfx.rlc.cp_table_obj = NULL;
1255 	}
1256 }
1257 
1258 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1259 {
1260 	volatile u32 *dst_ptr;
1261 	u32 dws;
1262 	const struct cs_section_def *cs_data;
1263 	int r;
1264 
1265 	adev->gfx.rlc.cs_data = vi_cs_data;
1266 
1267 	cs_data = adev->gfx.rlc.cs_data;
1268 
1269 	if (cs_data) {
1270 		/* clear state block */
1271 		adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1272 
1273 		if (adev->gfx.rlc.clear_state_obj == NULL) {
1274 			r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1275 					     AMDGPU_GEM_DOMAIN_VRAM,
1276 					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1277 					     NULL, NULL,
1278 					     &adev->gfx.rlc.clear_state_obj);
1279 			if (r) {
1280 				dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1281 				gfx_v8_0_rlc_fini(adev);
1282 				return r;
1283 			}
1284 		}
1285 		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1286 		if (unlikely(r != 0)) {
1287 			gfx_v8_0_rlc_fini(adev);
1288 			return r;
1289 		}
1290 		r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1291 				  &adev->gfx.rlc.clear_state_gpu_addr);
1292 		if (r) {
1293 			amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1294 			dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
1295 			gfx_v8_0_rlc_fini(adev);
1296 			return r;
1297 		}
1298 
1299 		r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1300 		if (r) {
1301 			dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
1302 			gfx_v8_0_rlc_fini(adev);
1303 			return r;
1304 		}
1305 		/* set up the cs buffer */
1306 		dst_ptr = adev->gfx.rlc.cs_ptr;
1307 		gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1308 		amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1309 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1310 	}
1311 
1312 	if ((adev->asic_type == CHIP_CARRIZO) ||
1313 	    (adev->asic_type == CHIP_STONEY)) {
1314 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1315 		if (adev->gfx.rlc.cp_table_obj == NULL) {
1316 			r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1317 					     AMDGPU_GEM_DOMAIN_VRAM,
1318 					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1319 					     NULL, NULL,
1320 					     &adev->gfx.rlc.cp_table_obj);
1321 			if (r) {
1322 				dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1323 				return r;
1324 			}
1325 		}
1326 
1327 		r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1328 		if (unlikely(r != 0)) {
1329 			dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1330 			return r;
1331 		}
1332 		r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1333 				  &adev->gfx.rlc.cp_table_gpu_addr);
1334 		if (r) {
1335 			amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1336 			dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
1337 			return r;
1338 		}
1339 		r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1340 		if (r) {
1341 			dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1342 			return r;
1343 		}
1344 
1345 		cz_init_cp_jump_table(adev);
1346 
1347 		amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1348 		amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1349 	}
1350 
1351 	return 0;
1352 }
1353 
1354 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1355 {
1356 	int r;
1357 
1358 	if (adev->gfx.mec.hpd_eop_obj) {
1359 		r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1360 		if (unlikely(r != 0))
1361 			dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1362 		amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1363 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1364 		amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1365 		adev->gfx.mec.hpd_eop_obj = NULL;
1366 	}
1367 }
1368 
1369 #define MEC_HPD_SIZE 2048
1370 
1371 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1372 {
1373 	int r;
1374 	u32 *hpd;
1375 
1376 	/*
1377 	 * we assign only 1 pipe because all other pipes will
1378 	 * be handled by KFD
1379 	 */
1380 	adev->gfx.mec.num_mec = 1;
1381 	adev->gfx.mec.num_pipe = 1;
1382 	adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1383 
1384 	if (adev->gfx.mec.hpd_eop_obj == NULL) {
1385 		r = amdgpu_bo_create(adev,
1386 				     adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
1387 				     PAGE_SIZE, true,
1388 				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1389 				     &adev->gfx.mec.hpd_eop_obj);
1390 		if (r) {
1391 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1392 			return r;
1393 		}
1394 	}
1395 
1396 	r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1397 	if (unlikely(r != 0)) {
1398 		gfx_v8_0_mec_fini(adev);
1399 		return r;
1400 	}
1401 	r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1402 			  &adev->gfx.mec.hpd_eop_gpu_addr);
1403 	if (r) {
1404 		dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1405 		gfx_v8_0_mec_fini(adev);
1406 		return r;
1407 	}
1408 	r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1409 	if (r) {
1410 		dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1411 		gfx_v8_0_mec_fini(adev);
1412 		return r;
1413 	}
1414 
1415 	memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
1416 
1417 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1418 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1419 
1420 	return 0;
1421 }
1422 
1423 static const u32 vgpr_init_compute_shader[] =
1424 {
1425 	0x7e000209, 0x7e020208,
1426 	0x7e040207, 0x7e060206,
1427 	0x7e080205, 0x7e0a0204,
1428 	0x7e0c0203, 0x7e0e0202,
1429 	0x7e100201, 0x7e120200,
1430 	0x7e140209, 0x7e160208,
1431 	0x7e180207, 0x7e1a0206,
1432 	0x7e1c0205, 0x7e1e0204,
1433 	0x7e200203, 0x7e220202,
1434 	0x7e240201, 0x7e260200,
1435 	0x7e280209, 0x7e2a0208,
1436 	0x7e2c0207, 0x7e2e0206,
1437 	0x7e300205, 0x7e320204,
1438 	0x7e340203, 0x7e360202,
1439 	0x7e380201, 0x7e3a0200,
1440 	0x7e3c0209, 0x7e3e0208,
1441 	0x7e400207, 0x7e420206,
1442 	0x7e440205, 0x7e460204,
1443 	0x7e480203, 0x7e4a0202,
1444 	0x7e4c0201, 0x7e4e0200,
1445 	0x7e500209, 0x7e520208,
1446 	0x7e540207, 0x7e560206,
1447 	0x7e580205, 0x7e5a0204,
1448 	0x7e5c0203, 0x7e5e0202,
1449 	0x7e600201, 0x7e620200,
1450 	0x7e640209, 0x7e660208,
1451 	0x7e680207, 0x7e6a0206,
1452 	0x7e6c0205, 0x7e6e0204,
1453 	0x7e700203, 0x7e720202,
1454 	0x7e740201, 0x7e760200,
1455 	0x7e780209, 0x7e7a0208,
1456 	0x7e7c0207, 0x7e7e0206,
1457 	0xbf8a0000, 0xbf810000,
1458 };
1459 
1460 static const u32 sgpr_init_compute_shader[] =
1461 {
1462 	0xbe8a0100, 0xbe8c0102,
1463 	0xbe8e0104, 0xbe900106,
1464 	0xbe920108, 0xbe940100,
1465 	0xbe960102, 0xbe980104,
1466 	0xbe9a0106, 0xbe9c0108,
1467 	0xbe9e0100, 0xbea00102,
1468 	0xbea20104, 0xbea40106,
1469 	0xbea60108, 0xbea80100,
1470 	0xbeaa0102, 0xbeac0104,
1471 	0xbeae0106, 0xbeb00108,
1472 	0xbeb20100, 0xbeb40102,
1473 	0xbeb60104, 0xbeb80106,
1474 	0xbeba0108, 0xbebc0100,
1475 	0xbebe0102, 0xbec00104,
1476 	0xbec20106, 0xbec40108,
1477 	0xbec60100, 0xbec80102,
1478 	0xbee60004, 0xbee70005,
1479 	0xbeea0006, 0xbeeb0007,
1480 	0xbee80008, 0xbee90009,
1481 	0xbefc0000, 0xbf8a0000,
1482 	0xbf810000, 0x00000000,
1483 };
1484 
1485 static const u32 vgpr_init_regs[] =
1486 {
1487 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1488 	mmCOMPUTE_RESOURCE_LIMITS, 0,
1489 	mmCOMPUTE_NUM_THREAD_X, 256*4,
1490 	mmCOMPUTE_NUM_THREAD_Y, 1,
1491 	mmCOMPUTE_NUM_THREAD_Z, 1,
1492 	mmCOMPUTE_PGM_RSRC2, 20,
1493 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1494 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1495 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1496 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1497 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1498 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1499 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1500 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1501 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1502 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1503 };
1504 
1505 static const u32 sgpr1_init_regs[] =
1506 {
1507 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1508 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1509 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1510 	mmCOMPUTE_NUM_THREAD_Y, 1,
1511 	mmCOMPUTE_NUM_THREAD_Z, 1,
1512 	mmCOMPUTE_PGM_RSRC2, 20,
1513 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1514 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1515 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1516 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1517 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1518 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1519 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1520 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1521 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1522 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1523 };
1524 
1525 static const u32 sgpr2_init_regs[] =
1526 {
1527 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1528 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1529 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1530 	mmCOMPUTE_NUM_THREAD_Y, 1,
1531 	mmCOMPUTE_NUM_THREAD_Z, 1,
1532 	mmCOMPUTE_PGM_RSRC2, 20,
1533 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1534 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1535 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1536 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1537 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1538 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1539 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1540 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1541 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1542 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1543 };
1544 
1545 static const u32 sec_ded_counter_registers[] =
1546 {
1547 	mmCPC_EDC_ATC_CNT,
1548 	mmCPC_EDC_SCRATCH_CNT,
1549 	mmCPC_EDC_UCODE_CNT,
1550 	mmCPF_EDC_ATC_CNT,
1551 	mmCPF_EDC_ROQ_CNT,
1552 	mmCPF_EDC_TAG_CNT,
1553 	mmCPG_EDC_ATC_CNT,
1554 	mmCPG_EDC_DMA_CNT,
1555 	mmCPG_EDC_TAG_CNT,
1556 	mmDC_EDC_CSINVOC_CNT,
1557 	mmDC_EDC_RESTORE_CNT,
1558 	mmDC_EDC_STATE_CNT,
1559 	mmGDS_EDC_CNT,
1560 	mmGDS_EDC_GRBM_CNT,
1561 	mmGDS_EDC_OA_DED,
1562 	mmSPI_EDC_CNT,
1563 	mmSQC_ATC_EDC_GATCL1_CNT,
1564 	mmSQC_EDC_CNT,
1565 	mmSQ_EDC_DED_CNT,
1566 	mmSQ_EDC_INFO,
1567 	mmSQ_EDC_SEC_CNT,
1568 	mmTCC_EDC_CNT,
1569 	mmTCP_ATC_EDC_GATCL1_CNT,
1570 	mmTCP_EDC_CNT,
1571 	mmTD_EDC_CNT
1572 };
1573 
1574 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1575 {
1576 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1577 	struct amdgpu_ib ib;
1578 	struct fence *f = NULL;
1579 	int r, i;
1580 	u32 tmp;
1581 	unsigned total_size, vgpr_offset, sgpr_offset;
1582 	u64 gpu_addr;
1583 
1584 	/* only supported on CZ */
1585 	if (adev->asic_type != CHIP_CARRIZO)
1586 		return 0;
1587 
1588 	/* bail if the compute ring is not ready */
1589 	if (!ring->ready)
1590 		return 0;
1591 
1592 	tmp = RREG32(mmGB_EDC_MODE);
1593 	WREG32(mmGB_EDC_MODE, 0);
1594 
1595 	total_size =
1596 		(((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1597 	total_size +=
1598 		(((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1599 	total_size +=
1600 		(((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1601 	total_size = ALIGN(total_size, 256);
1602 	vgpr_offset = total_size;
1603 	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1604 	sgpr_offset = total_size;
1605 	total_size += sizeof(sgpr_init_compute_shader);
1606 
1607 	/* allocate an indirect buffer to put the commands in */
1608 	memset(&ib, 0, sizeof(ib));
1609 	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1610 	if (r) {
1611 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1612 		return r;
1613 	}
1614 
1615 	/* load the compute shaders */
1616 	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1617 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1618 
1619 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1620 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1621 
1622 	/* init the ib length to 0 */
1623 	ib.length_dw = 0;
1624 
1625 	/* VGPR */
1626 	/* write the register state for the compute dispatch */
1627 	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1628 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1629 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1630 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1631 	}
1632 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1633 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1634 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1635 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1636 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1637 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1638 
1639 	/* write dispatch packet */
1640 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1641 	ib.ptr[ib.length_dw++] = 8; /* x */
1642 	ib.ptr[ib.length_dw++] = 1; /* y */
1643 	ib.ptr[ib.length_dw++] = 1; /* z */
1644 	ib.ptr[ib.length_dw++] =
1645 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1646 
1647 	/* write CS partial flush packet */
1648 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1649 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1650 
1651 	/* SGPR1 */
1652 	/* write the register state for the compute dispatch */
1653 	for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1654 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1655 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1656 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1657 	}
1658 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1659 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1660 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1661 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1662 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1663 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1664 
1665 	/* write dispatch packet */
1666 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1667 	ib.ptr[ib.length_dw++] = 8; /* x */
1668 	ib.ptr[ib.length_dw++] = 1; /* y */
1669 	ib.ptr[ib.length_dw++] = 1; /* z */
1670 	ib.ptr[ib.length_dw++] =
1671 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1672 
1673 	/* write CS partial flush packet */
1674 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1675 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1676 
1677 	/* SGPR2 */
1678 	/* write the register state for the compute dispatch */
1679 	for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1680 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1681 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1682 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1683 	}
1684 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1685 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1686 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1687 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1688 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1689 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1690 
1691 	/* write dispatch packet */
1692 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1693 	ib.ptr[ib.length_dw++] = 8; /* x */
1694 	ib.ptr[ib.length_dw++] = 1; /* y */
1695 	ib.ptr[ib.length_dw++] = 1; /* z */
1696 	ib.ptr[ib.length_dw++] =
1697 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1698 
1699 	/* write CS partial flush packet */
1700 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1701 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1702 
1703 	/* shedule the ib on the ring */
1704 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
1705 	if (r) {
1706 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1707 		goto fail;
1708 	}
1709 
1710 	/* wait for the GPU to finish processing the IB */
1711 	r = fence_wait(f, false);
1712 	if (r) {
1713 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1714 		goto fail;
1715 	}
1716 
1717 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1718 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1719 	WREG32(mmGB_EDC_MODE, tmp);
1720 
1721 	tmp = RREG32(mmCC_GC_EDC_CONFIG);
1722 	tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1723 	WREG32(mmCC_GC_EDC_CONFIG, tmp);
1724 
1725 
1726 	/* read back registers to clear the counters */
1727 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1728 		RREG32(sec_ded_counter_registers[i]);
1729 
1730 fail:
1731 	amdgpu_ib_free(adev, &ib, NULL);
1732 	fence_put(f);
1733 
1734 	return r;
1735 }
1736 
1737 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1738 {
1739 	u32 gb_addr_config;
1740 	u32 mc_shared_chmap, mc_arb_ramcfg;
1741 	u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1742 	u32 tmp;
1743 	int ret;
1744 
1745 	switch (adev->asic_type) {
1746 	case CHIP_TOPAZ:
1747 		adev->gfx.config.max_shader_engines = 1;
1748 		adev->gfx.config.max_tile_pipes = 2;
1749 		adev->gfx.config.max_cu_per_sh = 6;
1750 		adev->gfx.config.max_sh_per_se = 1;
1751 		adev->gfx.config.max_backends_per_se = 2;
1752 		adev->gfx.config.max_texture_channel_caches = 2;
1753 		adev->gfx.config.max_gprs = 256;
1754 		adev->gfx.config.max_gs_threads = 32;
1755 		adev->gfx.config.max_hw_contexts = 8;
1756 
1757 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1758 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1759 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1760 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1761 		gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1762 		break;
1763 	case CHIP_FIJI:
1764 		adev->gfx.config.max_shader_engines = 4;
1765 		adev->gfx.config.max_tile_pipes = 16;
1766 		adev->gfx.config.max_cu_per_sh = 16;
1767 		adev->gfx.config.max_sh_per_se = 1;
1768 		adev->gfx.config.max_backends_per_se = 4;
1769 		adev->gfx.config.max_texture_channel_caches = 16;
1770 		adev->gfx.config.max_gprs = 256;
1771 		adev->gfx.config.max_gs_threads = 32;
1772 		adev->gfx.config.max_hw_contexts = 8;
1773 
1774 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1775 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1776 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1777 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1778 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1779 		break;
1780 	case CHIP_POLARIS11:
1781 		ret = amdgpu_atombios_get_gfx_info(adev);
1782 		if (ret)
1783 			return ret;
1784 		adev->gfx.config.max_gprs = 256;
1785 		adev->gfx.config.max_gs_threads = 32;
1786 		adev->gfx.config.max_hw_contexts = 8;
1787 
1788 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1789 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1790 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1791 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1792 		gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1793 		break;
1794 	case CHIP_POLARIS10:
1795 		ret = amdgpu_atombios_get_gfx_info(adev);
1796 		if (ret)
1797 			return ret;
1798 		adev->gfx.config.max_gprs = 256;
1799 		adev->gfx.config.max_gs_threads = 32;
1800 		adev->gfx.config.max_hw_contexts = 8;
1801 
1802 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1803 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1804 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1805 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1806 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1807 		break;
1808 	case CHIP_TONGA:
1809 		adev->gfx.config.max_shader_engines = 4;
1810 		adev->gfx.config.max_tile_pipes = 8;
1811 		adev->gfx.config.max_cu_per_sh = 8;
1812 		adev->gfx.config.max_sh_per_se = 1;
1813 		adev->gfx.config.max_backends_per_se = 2;
1814 		adev->gfx.config.max_texture_channel_caches = 8;
1815 		adev->gfx.config.max_gprs = 256;
1816 		adev->gfx.config.max_gs_threads = 32;
1817 		adev->gfx.config.max_hw_contexts = 8;
1818 
1819 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1820 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1821 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1822 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1823 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1824 		break;
1825 	case CHIP_CARRIZO:
1826 		adev->gfx.config.max_shader_engines = 1;
1827 		adev->gfx.config.max_tile_pipes = 2;
1828 		adev->gfx.config.max_sh_per_se = 1;
1829 		adev->gfx.config.max_backends_per_se = 2;
1830 
1831 		switch (adev->pdev->revision) {
1832 		case 0xc4:
1833 		case 0x84:
1834 		case 0xc8:
1835 		case 0xcc:
1836 		case 0xe1:
1837 		case 0xe3:
1838 			/* B10 */
1839 			adev->gfx.config.max_cu_per_sh = 8;
1840 			break;
1841 		case 0xc5:
1842 		case 0x81:
1843 		case 0x85:
1844 		case 0xc9:
1845 		case 0xcd:
1846 		case 0xe2:
1847 		case 0xe4:
1848 			/* B8 */
1849 			adev->gfx.config.max_cu_per_sh = 6;
1850 			break;
1851 		case 0xc6:
1852 		case 0xca:
1853 		case 0xce:
1854 		case 0x88:
1855 			/* B6 */
1856 			adev->gfx.config.max_cu_per_sh = 6;
1857 			break;
1858 		case 0xc7:
1859 		case 0x87:
1860 		case 0xcb:
1861 		case 0xe5:
1862 		case 0x89:
1863 		default:
1864 			/* B4 */
1865 			adev->gfx.config.max_cu_per_sh = 4;
1866 			break;
1867 		}
1868 
1869 		adev->gfx.config.max_texture_channel_caches = 2;
1870 		adev->gfx.config.max_gprs = 256;
1871 		adev->gfx.config.max_gs_threads = 32;
1872 		adev->gfx.config.max_hw_contexts = 8;
1873 
1874 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1875 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1876 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1877 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1878 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1879 		break;
1880 	case CHIP_STONEY:
1881 		adev->gfx.config.max_shader_engines = 1;
1882 		adev->gfx.config.max_tile_pipes = 2;
1883 		adev->gfx.config.max_sh_per_se = 1;
1884 		adev->gfx.config.max_backends_per_se = 1;
1885 
1886 		switch (adev->pdev->revision) {
1887 		case 0xc0:
1888 		case 0xc1:
1889 		case 0xc2:
1890 		case 0xc4:
1891 		case 0xc8:
1892 		case 0xc9:
1893 			adev->gfx.config.max_cu_per_sh = 3;
1894 			break;
1895 		case 0xd0:
1896 		case 0xd1:
1897 		case 0xd2:
1898 		default:
1899 			adev->gfx.config.max_cu_per_sh = 2;
1900 			break;
1901 		}
1902 
1903 		adev->gfx.config.max_texture_channel_caches = 2;
1904 		adev->gfx.config.max_gprs = 256;
1905 		adev->gfx.config.max_gs_threads = 16;
1906 		adev->gfx.config.max_hw_contexts = 8;
1907 
1908 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1909 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1910 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1911 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1912 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1913 		break;
1914 	default:
1915 		adev->gfx.config.max_shader_engines = 2;
1916 		adev->gfx.config.max_tile_pipes = 4;
1917 		adev->gfx.config.max_cu_per_sh = 2;
1918 		adev->gfx.config.max_sh_per_se = 1;
1919 		adev->gfx.config.max_backends_per_se = 2;
1920 		adev->gfx.config.max_texture_channel_caches = 4;
1921 		adev->gfx.config.max_gprs = 256;
1922 		adev->gfx.config.max_gs_threads = 32;
1923 		adev->gfx.config.max_hw_contexts = 8;
1924 
1925 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1926 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1927 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1928 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1929 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1930 		break;
1931 	}
1932 
1933 	mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1934 	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1935 	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1936 
1937 	adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1938 	adev->gfx.config.mem_max_burst_length_bytes = 256;
1939 	if (adev->flags & AMD_IS_APU) {
1940 		/* Get memory bank mapping mode. */
1941 		tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1942 		dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1943 		dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1944 
1945 		tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1946 		dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1947 		dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1948 
1949 		/* Validate settings in case only one DIMM installed. */
1950 		if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1951 			dimm00_addr_map = 0;
1952 		if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1953 			dimm01_addr_map = 0;
1954 		if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1955 			dimm10_addr_map = 0;
1956 		if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1957 			dimm11_addr_map = 0;
1958 
1959 		/* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1960 		/* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1961 		if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1962 			adev->gfx.config.mem_row_size_in_kb = 2;
1963 		else
1964 			adev->gfx.config.mem_row_size_in_kb = 1;
1965 	} else {
1966 		tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1967 		adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1968 		if (adev->gfx.config.mem_row_size_in_kb > 4)
1969 			adev->gfx.config.mem_row_size_in_kb = 4;
1970 	}
1971 
1972 	adev->gfx.config.shader_engine_tile_size = 32;
1973 	adev->gfx.config.num_gpus = 1;
1974 	adev->gfx.config.multi_gpu_tile_size = 64;
1975 
1976 	/* fix up row size */
1977 	switch (adev->gfx.config.mem_row_size_in_kb) {
1978 	case 1:
1979 	default:
1980 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1981 		break;
1982 	case 2:
1983 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1984 		break;
1985 	case 4:
1986 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1987 		break;
1988 	}
1989 	adev->gfx.config.gb_addr_config = gb_addr_config;
1990 
1991 	return 0;
1992 }
1993 
1994 static int gfx_v8_0_sw_init(void *handle)
1995 {
1996 	int i, r;
1997 	struct amdgpu_ring *ring;
1998 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1999 
2000 	/* EOP Event */
2001 	r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
2002 	if (r)
2003 		return r;
2004 
2005 	/* Privileged reg */
2006 	r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
2007 	if (r)
2008 		return r;
2009 
2010 	/* Privileged inst */
2011 	r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
2012 	if (r)
2013 		return r;
2014 
2015 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2016 
2017 	gfx_v8_0_scratch_init(adev);
2018 
2019 	r = gfx_v8_0_init_microcode(adev);
2020 	if (r) {
2021 		DRM_ERROR("Failed to load gfx firmware!\n");
2022 		return r;
2023 	}
2024 
2025 	r = gfx_v8_0_rlc_init(adev);
2026 	if (r) {
2027 		DRM_ERROR("Failed to init rlc BOs!\n");
2028 		return r;
2029 	}
2030 
2031 	r = gfx_v8_0_mec_init(adev);
2032 	if (r) {
2033 		DRM_ERROR("Failed to init MEC BOs!\n");
2034 		return r;
2035 	}
2036 
2037 	/* set up the gfx ring */
2038 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2039 		ring = &adev->gfx.gfx_ring[i];
2040 		ring->ring_obj = NULL;
2041 		sprintf(ring->name, "gfx");
2042 		/* no gfx doorbells on iceland */
2043 		if (adev->asic_type != CHIP_TOPAZ) {
2044 			ring->use_doorbell = true;
2045 			ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2046 		}
2047 
2048 		r = amdgpu_ring_init(adev, ring, 1024,
2049 				     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
2050 				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
2051 				     AMDGPU_RING_TYPE_GFX);
2052 		if (r)
2053 			return r;
2054 	}
2055 
2056 	/* set up the compute queues */
2057 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2058 		unsigned irq_type;
2059 
2060 		/* max 32 queues per MEC */
2061 		if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2062 			DRM_ERROR("Too many (%d) compute rings!\n", i);
2063 			break;
2064 		}
2065 		ring = &adev->gfx.compute_ring[i];
2066 		ring->ring_obj = NULL;
2067 		ring->use_doorbell = true;
2068 		ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2069 		ring->me = 1; /* first MEC */
2070 		ring->pipe = i / 8;
2071 		ring->queue = i % 8;
2072 		sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2073 		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2074 		/* type-2 packets are deprecated on MEC, use type-3 instead */
2075 		r = amdgpu_ring_init(adev, ring, 1024,
2076 				     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
2077 				     &adev->gfx.eop_irq, irq_type,
2078 				     AMDGPU_RING_TYPE_COMPUTE);
2079 		if (r)
2080 			return r;
2081 	}
2082 
2083 	/* reserve GDS, GWS and OA resource for gfx */
2084 	r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2085 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2086 				    &adev->gds.gds_gfx_bo, NULL, NULL);
2087 	if (r)
2088 		return r;
2089 
2090 	r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2091 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2092 				    &adev->gds.gws_gfx_bo, NULL, NULL);
2093 	if (r)
2094 		return r;
2095 
2096 	r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2097 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2098 				    &adev->gds.oa_gfx_bo, NULL, NULL);
2099 	if (r)
2100 		return r;
2101 
2102 	adev->gfx.ce_ram_size = 0x8000;
2103 
2104 	r = gfx_v8_0_gpu_early_init(adev);
2105 	if (r)
2106 		return r;
2107 
2108 	return 0;
2109 }
2110 
2111 static int gfx_v8_0_sw_fini(void *handle)
2112 {
2113 	int i;
2114 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2115 
2116 	amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2117 	amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2118 	amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2119 
2120 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2121 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2122 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2123 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2124 
2125 	gfx_v8_0_mec_fini(adev);
2126 	gfx_v8_0_rlc_fini(adev);
2127 	gfx_v8_0_free_microcode(adev);
2128 
2129 	return 0;
2130 }
2131 
2132 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2133 {
2134 	uint32_t *modearray, *mod2array;
2135 	const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2136 	const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2137 	u32 reg_offset;
2138 
2139 	modearray = adev->gfx.config.tile_mode_array;
2140 	mod2array = adev->gfx.config.macrotile_mode_array;
2141 
2142 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2143 		modearray[reg_offset] = 0;
2144 
2145 	for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2146 		mod2array[reg_offset] = 0;
2147 
2148 	switch (adev->asic_type) {
2149 	case CHIP_TOPAZ:
2150 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2151 				PIPE_CONFIG(ADDR_SURF_P2) |
2152 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2153 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2154 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2155 				PIPE_CONFIG(ADDR_SURF_P2) |
2156 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2157 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2158 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2159 				PIPE_CONFIG(ADDR_SURF_P2) |
2160 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2161 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2162 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2163 				PIPE_CONFIG(ADDR_SURF_P2) |
2164 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2165 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2166 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2167 				PIPE_CONFIG(ADDR_SURF_P2) |
2168 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2169 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2170 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2171 				PIPE_CONFIG(ADDR_SURF_P2) |
2172 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2173 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2174 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2175 				PIPE_CONFIG(ADDR_SURF_P2) |
2176 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2177 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2178 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2179 				PIPE_CONFIG(ADDR_SURF_P2));
2180 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2181 				PIPE_CONFIG(ADDR_SURF_P2) |
2182 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2183 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2184 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2185 				 PIPE_CONFIG(ADDR_SURF_P2) |
2186 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2187 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2188 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2189 				 PIPE_CONFIG(ADDR_SURF_P2) |
2190 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2191 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2192 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2193 				 PIPE_CONFIG(ADDR_SURF_P2) |
2194 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2195 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2196 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2197 				 PIPE_CONFIG(ADDR_SURF_P2) |
2198 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2199 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2200 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2201 				 PIPE_CONFIG(ADDR_SURF_P2) |
2202 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2203 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2204 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2205 				 PIPE_CONFIG(ADDR_SURF_P2) |
2206 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2207 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2208 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2209 				 PIPE_CONFIG(ADDR_SURF_P2) |
2210 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2211 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2212 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2213 				 PIPE_CONFIG(ADDR_SURF_P2) |
2214 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2215 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2216 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2217 				 PIPE_CONFIG(ADDR_SURF_P2) |
2218 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2219 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2220 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2221 				 PIPE_CONFIG(ADDR_SURF_P2) |
2222 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2223 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2224 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2225 				 PIPE_CONFIG(ADDR_SURF_P2) |
2226 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2227 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2228 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2229 				 PIPE_CONFIG(ADDR_SURF_P2) |
2230 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2231 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2232 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2233 				 PIPE_CONFIG(ADDR_SURF_P2) |
2234 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2235 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2236 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2237 				 PIPE_CONFIG(ADDR_SURF_P2) |
2238 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2239 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2240 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2241 				 PIPE_CONFIG(ADDR_SURF_P2) |
2242 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2243 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2244 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2245 				 PIPE_CONFIG(ADDR_SURF_P2) |
2246 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2247 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2248 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2249 				 PIPE_CONFIG(ADDR_SURF_P2) |
2250 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2251 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2252 
2253 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2254 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2255 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2256 				NUM_BANKS(ADDR_SURF_8_BANK));
2257 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2258 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2259 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2260 				NUM_BANKS(ADDR_SURF_8_BANK));
2261 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2262 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2263 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2264 				NUM_BANKS(ADDR_SURF_8_BANK));
2265 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2266 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2267 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2268 				NUM_BANKS(ADDR_SURF_8_BANK));
2269 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2270 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2271 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2272 				NUM_BANKS(ADDR_SURF_8_BANK));
2273 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2274 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2275 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2276 				NUM_BANKS(ADDR_SURF_8_BANK));
2277 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2278 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2279 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2280 				NUM_BANKS(ADDR_SURF_8_BANK));
2281 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2282 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2283 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2284 				NUM_BANKS(ADDR_SURF_16_BANK));
2285 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2286 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2287 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2288 				NUM_BANKS(ADDR_SURF_16_BANK));
2289 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2290 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2291 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2292 				 NUM_BANKS(ADDR_SURF_16_BANK));
2293 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2294 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2295 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2296 				 NUM_BANKS(ADDR_SURF_16_BANK));
2297 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2298 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2299 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2300 				 NUM_BANKS(ADDR_SURF_16_BANK));
2301 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2302 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2303 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2304 				 NUM_BANKS(ADDR_SURF_16_BANK));
2305 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2306 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2307 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2308 				 NUM_BANKS(ADDR_SURF_8_BANK));
2309 
2310 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2311 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2312 			    reg_offset != 23)
2313 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2314 
2315 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2316 			if (reg_offset != 7)
2317 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2318 
2319 		break;
2320 	case CHIP_FIJI:
2321 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2322 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2323 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2324 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2325 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2326 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2327 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2328 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2329 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2330 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2331 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2332 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2333 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2334 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2335 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2336 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2337 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2338 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2339 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2340 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2341 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2342 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2343 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2344 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2345 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2346 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2347 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2348 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2349 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2350 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2351 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2352 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2353 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2354 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2355 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2356 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2357 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2358 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2359 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2360 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2361 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2362 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2363 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2364 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2365 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2366 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2367 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2368 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2369 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2370 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2371 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2372 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2373 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2374 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2375 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2376 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2377 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2378 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2379 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2380 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2381 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2382 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2383 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2384 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2385 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2386 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2387 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2388 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2389 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2390 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2391 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2392 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2393 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2394 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2395 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2396 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2397 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2398 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2399 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2400 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2401 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2402 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2403 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2404 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2405 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2406 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2407 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2408 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2409 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2410 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2411 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2412 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2413 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2414 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2415 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2416 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2417 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2418 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2419 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2420 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2421 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2422 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2423 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2424 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2425 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2426 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2427 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2428 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2429 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2430 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2431 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2432 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2433 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2434 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2435 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2436 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2437 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2438 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2439 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2440 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2441 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2442 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2443 
2444 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2445 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2446 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2447 				NUM_BANKS(ADDR_SURF_8_BANK));
2448 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2449 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2450 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2451 				NUM_BANKS(ADDR_SURF_8_BANK));
2452 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2453 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2454 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2455 				NUM_BANKS(ADDR_SURF_8_BANK));
2456 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2457 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2458 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2459 				NUM_BANKS(ADDR_SURF_8_BANK));
2460 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2461 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2462 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2463 				NUM_BANKS(ADDR_SURF_8_BANK));
2464 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2466 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2467 				NUM_BANKS(ADDR_SURF_8_BANK));
2468 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2470 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2471 				NUM_BANKS(ADDR_SURF_8_BANK));
2472 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2474 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2475 				NUM_BANKS(ADDR_SURF_8_BANK));
2476 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2478 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2479 				NUM_BANKS(ADDR_SURF_8_BANK));
2480 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2482 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2483 				 NUM_BANKS(ADDR_SURF_8_BANK));
2484 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2487 				 NUM_BANKS(ADDR_SURF_8_BANK));
2488 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2490 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2491 				 NUM_BANKS(ADDR_SURF_8_BANK));
2492 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2493 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2494 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2495 				 NUM_BANKS(ADDR_SURF_8_BANK));
2496 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2497 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2498 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2499 				 NUM_BANKS(ADDR_SURF_4_BANK));
2500 
2501 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2502 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2503 
2504 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2505 			if (reg_offset != 7)
2506 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2507 
2508 		break;
2509 	case CHIP_TONGA:
2510 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2511 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2512 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2513 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2514 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2515 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2516 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2517 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2518 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2519 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2520 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2521 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2522 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2523 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2524 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2525 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2526 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2527 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2528 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2529 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2530 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2531 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2532 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2533 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2534 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2535 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2536 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2537 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2538 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2539 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2540 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2541 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2542 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2543 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2544 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2545 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2546 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2547 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2548 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2549 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2550 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2551 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2552 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2553 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2554 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2555 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2556 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2557 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2558 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2559 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2560 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2561 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2562 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2563 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2564 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2565 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2566 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2567 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2568 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2569 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2570 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2571 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2572 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2573 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2574 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2575 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2576 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2577 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2578 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2579 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2580 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2581 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2582 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2583 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2584 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2585 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2586 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2587 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2588 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2589 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2590 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2591 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2592 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2593 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2594 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2595 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2596 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2597 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2598 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2599 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2600 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2601 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2602 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2603 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2604 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2605 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2606 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2607 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2608 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2609 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2610 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2611 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2612 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2613 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2614 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2615 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2616 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2617 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2618 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2619 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2620 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2621 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2622 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2623 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2624 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2625 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2626 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2627 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2628 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2629 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2630 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2631 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2632 
2633 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2634 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2635 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2636 				NUM_BANKS(ADDR_SURF_16_BANK));
2637 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2638 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2639 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2640 				NUM_BANKS(ADDR_SURF_16_BANK));
2641 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2642 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2643 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2644 				NUM_BANKS(ADDR_SURF_16_BANK));
2645 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2646 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2647 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2648 				NUM_BANKS(ADDR_SURF_16_BANK));
2649 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2650 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2651 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2652 				NUM_BANKS(ADDR_SURF_16_BANK));
2653 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2654 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2655 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2656 				NUM_BANKS(ADDR_SURF_16_BANK));
2657 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2658 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2659 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2660 				NUM_BANKS(ADDR_SURF_16_BANK));
2661 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2662 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2663 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2664 				NUM_BANKS(ADDR_SURF_16_BANK));
2665 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2666 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2667 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2668 				NUM_BANKS(ADDR_SURF_16_BANK));
2669 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2670 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2671 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2672 				 NUM_BANKS(ADDR_SURF_16_BANK));
2673 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2674 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2675 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2676 				 NUM_BANKS(ADDR_SURF_16_BANK));
2677 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2678 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2679 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2680 				 NUM_BANKS(ADDR_SURF_8_BANK));
2681 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2682 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2683 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2684 				 NUM_BANKS(ADDR_SURF_4_BANK));
2685 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2686 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2687 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2688 				 NUM_BANKS(ADDR_SURF_4_BANK));
2689 
2690 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2691 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2692 
2693 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2694 			if (reg_offset != 7)
2695 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2696 
2697 		break;
2698 	case CHIP_POLARIS11:
2699 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2700 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2701 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2702 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2703 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2704 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2705 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2706 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2707 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2708 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2709 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2710 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2711 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2712 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2713 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2714 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2715 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2716 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2717 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2718 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2719 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2720 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2721 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2722 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2723 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2724 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2725 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2726 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2727 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2728 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2729 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2730 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2731 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2732 				PIPE_CONFIG(ADDR_SURF_P4_16x16));
2733 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2734 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2735 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2736 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2737 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2738 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2739 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2740 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2741 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2742 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2743 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2744 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2745 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2746 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2747 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2748 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2749 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2750 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2751 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2752 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2753 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2754 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2755 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2756 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2757 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2758 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2759 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2760 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2761 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2762 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2763 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2764 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2765 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2766 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2767 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2768 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2769 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2770 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2771 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2772 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2773 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2774 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2775 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2776 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2777 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2778 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2779 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2780 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2781 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2782 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2783 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2784 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2785 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2786 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2787 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2788 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2789 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2790 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2791 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2792 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2793 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2794 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2795 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2796 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2797 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2798 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2799 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2800 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2801 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2802 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2803 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2804 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2805 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2806 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2807 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2808 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2809 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2810 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2811 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2812 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2813 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2814 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2815 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2816 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2817 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2818 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2819 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2820 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2821 
2822 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2823 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2824 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2825 				NUM_BANKS(ADDR_SURF_16_BANK));
2826 
2827 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2828 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2829 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2830 				NUM_BANKS(ADDR_SURF_16_BANK));
2831 
2832 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2833 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2834 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2835 				NUM_BANKS(ADDR_SURF_16_BANK));
2836 
2837 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2839 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2840 				NUM_BANKS(ADDR_SURF_16_BANK));
2841 
2842 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2843 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2844 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2845 				NUM_BANKS(ADDR_SURF_16_BANK));
2846 
2847 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2848 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2849 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2850 				NUM_BANKS(ADDR_SURF_16_BANK));
2851 
2852 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2854 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2855 				NUM_BANKS(ADDR_SURF_16_BANK));
2856 
2857 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2858 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2859 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2860 				NUM_BANKS(ADDR_SURF_16_BANK));
2861 
2862 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2863 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2864 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2865 				NUM_BANKS(ADDR_SURF_16_BANK));
2866 
2867 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2868 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2869 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2870 				NUM_BANKS(ADDR_SURF_16_BANK));
2871 
2872 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2873 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2874 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2875 				NUM_BANKS(ADDR_SURF_16_BANK));
2876 
2877 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2878 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2879 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2880 				NUM_BANKS(ADDR_SURF_16_BANK));
2881 
2882 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2883 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2884 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2885 				NUM_BANKS(ADDR_SURF_8_BANK));
2886 
2887 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2888 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2889 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2890 				NUM_BANKS(ADDR_SURF_4_BANK));
2891 
2892 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2893 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2894 
2895 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2896 			if (reg_offset != 7)
2897 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2898 
2899 		break;
2900 	case CHIP_POLARIS10:
2901 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2902 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2903 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2904 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2905 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2906 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2907 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2908 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2909 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2910 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2911 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2912 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2913 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2914 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2915 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2916 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2917 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2918 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2919 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2920 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2921 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2922 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2923 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2924 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2925 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2926 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2927 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2928 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2929 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2930 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2931 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2932 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2933 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2934 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2935 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2936 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2937 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2938 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2939 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2940 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2941 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2942 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2943 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2944 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2945 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2946 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2947 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2948 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2949 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2950 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2951 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2952 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2953 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2954 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2955 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2956 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2957 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2958 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2959 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2960 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2961 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2962 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2963 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2964 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2965 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2966 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2967 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2968 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2969 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2970 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2971 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2972 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2973 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2974 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2975 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2976 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2977 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2978 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2979 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2980 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2981 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2982 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2983 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2984 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2985 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2986 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2987 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2988 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2989 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2990 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2991 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2992 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2993 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2994 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2995 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2996 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2997 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2998 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2999 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3000 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3001 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3002 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3003 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3004 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3005 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3006 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3007 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3008 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3009 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3010 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3011 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3012 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3013 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3014 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3015 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3016 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3017 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3018 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3019 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3020 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3021 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3022 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3023 
3024 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3025 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3026 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3027 				NUM_BANKS(ADDR_SURF_16_BANK));
3028 
3029 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3030 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3031 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3032 				NUM_BANKS(ADDR_SURF_16_BANK));
3033 
3034 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3035 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3036 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3037 				NUM_BANKS(ADDR_SURF_16_BANK));
3038 
3039 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3040 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3041 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3042 				NUM_BANKS(ADDR_SURF_16_BANK));
3043 
3044 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3045 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3046 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3047 				NUM_BANKS(ADDR_SURF_16_BANK));
3048 
3049 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3050 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3051 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3052 				NUM_BANKS(ADDR_SURF_16_BANK));
3053 
3054 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3055 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3056 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3057 				NUM_BANKS(ADDR_SURF_16_BANK));
3058 
3059 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3060 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3061 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3062 				NUM_BANKS(ADDR_SURF_16_BANK));
3063 
3064 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3065 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3066 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3067 				NUM_BANKS(ADDR_SURF_16_BANK));
3068 
3069 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3070 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3071 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3072 				NUM_BANKS(ADDR_SURF_16_BANK));
3073 
3074 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3075 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3076 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3077 				NUM_BANKS(ADDR_SURF_16_BANK));
3078 
3079 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3080 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3081 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3082 				NUM_BANKS(ADDR_SURF_8_BANK));
3083 
3084 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3085 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3086 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3087 				NUM_BANKS(ADDR_SURF_4_BANK));
3088 
3089 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3090 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3091 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3092 				NUM_BANKS(ADDR_SURF_4_BANK));
3093 
3094 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3095 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3096 
3097 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3098 			if (reg_offset != 7)
3099 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3100 
3101 		break;
3102 	case CHIP_STONEY:
3103 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3104 				PIPE_CONFIG(ADDR_SURF_P2) |
3105 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3106 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3107 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3108 				PIPE_CONFIG(ADDR_SURF_P2) |
3109 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3110 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3111 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3112 				PIPE_CONFIG(ADDR_SURF_P2) |
3113 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3114 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3115 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3116 				PIPE_CONFIG(ADDR_SURF_P2) |
3117 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3118 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3119 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3120 				PIPE_CONFIG(ADDR_SURF_P2) |
3121 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3122 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3123 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3124 				PIPE_CONFIG(ADDR_SURF_P2) |
3125 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3126 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3127 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3128 				PIPE_CONFIG(ADDR_SURF_P2) |
3129 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3130 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3131 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3132 				PIPE_CONFIG(ADDR_SURF_P2));
3133 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3134 				PIPE_CONFIG(ADDR_SURF_P2) |
3135 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3136 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3137 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3138 				 PIPE_CONFIG(ADDR_SURF_P2) |
3139 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3140 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3141 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3142 				 PIPE_CONFIG(ADDR_SURF_P2) |
3143 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3144 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3145 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3146 				 PIPE_CONFIG(ADDR_SURF_P2) |
3147 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3148 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3149 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3150 				 PIPE_CONFIG(ADDR_SURF_P2) |
3151 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3152 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3153 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3154 				 PIPE_CONFIG(ADDR_SURF_P2) |
3155 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3156 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3157 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3158 				 PIPE_CONFIG(ADDR_SURF_P2) |
3159 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3160 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3161 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3162 				 PIPE_CONFIG(ADDR_SURF_P2) |
3163 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3164 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3165 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3166 				 PIPE_CONFIG(ADDR_SURF_P2) |
3167 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3168 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3169 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3170 				 PIPE_CONFIG(ADDR_SURF_P2) |
3171 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3172 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3173 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3174 				 PIPE_CONFIG(ADDR_SURF_P2) |
3175 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3176 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3177 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3178 				 PIPE_CONFIG(ADDR_SURF_P2) |
3179 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3180 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3181 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3182 				 PIPE_CONFIG(ADDR_SURF_P2) |
3183 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3184 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3185 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3186 				 PIPE_CONFIG(ADDR_SURF_P2) |
3187 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3188 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3189 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3190 				 PIPE_CONFIG(ADDR_SURF_P2) |
3191 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3192 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3193 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3194 				 PIPE_CONFIG(ADDR_SURF_P2) |
3195 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3196 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3197 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3198 				 PIPE_CONFIG(ADDR_SURF_P2) |
3199 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3200 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3201 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3202 				 PIPE_CONFIG(ADDR_SURF_P2) |
3203 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3204 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3205 
3206 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3207 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3208 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3209 				NUM_BANKS(ADDR_SURF_8_BANK));
3210 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3211 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3212 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3213 				NUM_BANKS(ADDR_SURF_8_BANK));
3214 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3215 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3216 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3217 				NUM_BANKS(ADDR_SURF_8_BANK));
3218 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3219 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3220 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3221 				NUM_BANKS(ADDR_SURF_8_BANK));
3222 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3223 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3224 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3225 				NUM_BANKS(ADDR_SURF_8_BANK));
3226 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3227 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3228 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3229 				NUM_BANKS(ADDR_SURF_8_BANK));
3230 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3231 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3232 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3233 				NUM_BANKS(ADDR_SURF_8_BANK));
3234 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3235 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3236 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3237 				NUM_BANKS(ADDR_SURF_16_BANK));
3238 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3239 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3240 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3241 				NUM_BANKS(ADDR_SURF_16_BANK));
3242 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3243 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3244 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3245 				 NUM_BANKS(ADDR_SURF_16_BANK));
3246 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3247 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3248 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3249 				 NUM_BANKS(ADDR_SURF_16_BANK));
3250 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3251 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3252 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3253 				 NUM_BANKS(ADDR_SURF_16_BANK));
3254 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3255 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3256 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3257 				 NUM_BANKS(ADDR_SURF_16_BANK));
3258 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3259 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3260 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3261 				 NUM_BANKS(ADDR_SURF_8_BANK));
3262 
3263 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3264 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3265 			    reg_offset != 23)
3266 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3267 
3268 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3269 			if (reg_offset != 7)
3270 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3271 
3272 		break;
3273 	default:
3274 		dev_warn(adev->dev,
3275 			 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3276 			 adev->asic_type);
3277 
3278 	case CHIP_CARRIZO:
3279 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3280 				PIPE_CONFIG(ADDR_SURF_P2) |
3281 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3282 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3283 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3284 				PIPE_CONFIG(ADDR_SURF_P2) |
3285 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3286 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3287 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3288 				PIPE_CONFIG(ADDR_SURF_P2) |
3289 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3290 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3291 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3292 				PIPE_CONFIG(ADDR_SURF_P2) |
3293 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3294 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3295 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3296 				PIPE_CONFIG(ADDR_SURF_P2) |
3297 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3298 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3299 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3300 				PIPE_CONFIG(ADDR_SURF_P2) |
3301 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3302 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3303 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3304 				PIPE_CONFIG(ADDR_SURF_P2) |
3305 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3306 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3307 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3308 				PIPE_CONFIG(ADDR_SURF_P2));
3309 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3310 				PIPE_CONFIG(ADDR_SURF_P2) |
3311 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3312 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3313 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3314 				 PIPE_CONFIG(ADDR_SURF_P2) |
3315 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3316 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3317 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3318 				 PIPE_CONFIG(ADDR_SURF_P2) |
3319 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3320 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3321 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3322 				 PIPE_CONFIG(ADDR_SURF_P2) |
3323 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3324 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3325 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3326 				 PIPE_CONFIG(ADDR_SURF_P2) |
3327 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3328 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3329 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3330 				 PIPE_CONFIG(ADDR_SURF_P2) |
3331 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3332 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3333 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3334 				 PIPE_CONFIG(ADDR_SURF_P2) |
3335 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3336 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3337 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3338 				 PIPE_CONFIG(ADDR_SURF_P2) |
3339 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3340 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3341 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3342 				 PIPE_CONFIG(ADDR_SURF_P2) |
3343 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3344 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3345 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3346 				 PIPE_CONFIG(ADDR_SURF_P2) |
3347 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3348 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3349 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3350 				 PIPE_CONFIG(ADDR_SURF_P2) |
3351 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3352 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3353 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3354 				 PIPE_CONFIG(ADDR_SURF_P2) |
3355 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3356 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3357 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3358 				 PIPE_CONFIG(ADDR_SURF_P2) |
3359 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3360 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3361 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3362 				 PIPE_CONFIG(ADDR_SURF_P2) |
3363 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3364 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3365 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3366 				 PIPE_CONFIG(ADDR_SURF_P2) |
3367 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3368 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3369 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3370 				 PIPE_CONFIG(ADDR_SURF_P2) |
3371 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3372 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3373 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3374 				 PIPE_CONFIG(ADDR_SURF_P2) |
3375 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3376 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3377 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3378 				 PIPE_CONFIG(ADDR_SURF_P2) |
3379 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3380 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3381 
3382 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3383 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3384 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3385 				NUM_BANKS(ADDR_SURF_8_BANK));
3386 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3387 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3388 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3389 				NUM_BANKS(ADDR_SURF_8_BANK));
3390 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3391 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3392 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3393 				NUM_BANKS(ADDR_SURF_8_BANK));
3394 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3395 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3396 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3397 				NUM_BANKS(ADDR_SURF_8_BANK));
3398 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3399 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3400 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3401 				NUM_BANKS(ADDR_SURF_8_BANK));
3402 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3403 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3404 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3405 				NUM_BANKS(ADDR_SURF_8_BANK));
3406 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3407 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3408 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3409 				NUM_BANKS(ADDR_SURF_8_BANK));
3410 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3411 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3412 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3413 				NUM_BANKS(ADDR_SURF_16_BANK));
3414 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3415 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3416 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3417 				NUM_BANKS(ADDR_SURF_16_BANK));
3418 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3419 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3420 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3421 				 NUM_BANKS(ADDR_SURF_16_BANK));
3422 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3423 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3424 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3425 				 NUM_BANKS(ADDR_SURF_16_BANK));
3426 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3427 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3428 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3429 				 NUM_BANKS(ADDR_SURF_16_BANK));
3430 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3431 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3432 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3433 				 NUM_BANKS(ADDR_SURF_16_BANK));
3434 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3435 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3436 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3437 				 NUM_BANKS(ADDR_SURF_8_BANK));
3438 
3439 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3440 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3441 			    reg_offset != 23)
3442 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3443 
3444 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3445 			if (reg_offset != 7)
3446 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3447 
3448 		break;
3449 	}
3450 }
3451 
3452 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3453 				  u32 se_num, u32 sh_num, u32 instance)
3454 {
3455 	u32 data;
3456 
3457 	if (instance == 0xffffffff)
3458 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3459 	else
3460 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3461 
3462 	if (se_num == 0xffffffff)
3463 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3464 	else
3465 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3466 
3467 	if (sh_num == 0xffffffff)
3468 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3469 	else
3470 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3471 
3472 	WREG32(mmGRBM_GFX_INDEX, data);
3473 }
3474 
3475 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3476 {
3477 	return (u32)((1ULL << bit_width) - 1);
3478 }
3479 
3480 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3481 {
3482 	u32 data, mask;
3483 
3484 	data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3485 		RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3486 
3487 	data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3488 
3489 	mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3490 				       adev->gfx.config.max_sh_per_se);
3491 
3492 	return (~data) & mask;
3493 }
3494 
3495 static void
3496 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3497 {
3498 	switch (adev->asic_type) {
3499 	case CHIP_FIJI:
3500 		*rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3501 			  RB_XSEL2(1) | PKR_MAP(2) |
3502 			  PKR_XSEL(1) | PKR_YSEL(1) |
3503 			  SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3504 		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3505 			   SE_PAIR_YSEL(2);
3506 		break;
3507 	case CHIP_TONGA:
3508 	case CHIP_POLARIS10:
3509 		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3510 			  SE_XSEL(1) | SE_YSEL(1);
3511 		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3512 			   SE_PAIR_YSEL(2);
3513 		break;
3514 	case CHIP_TOPAZ:
3515 	case CHIP_CARRIZO:
3516 		*rconf |= RB_MAP_PKR0(2);
3517 		*rconf1 |= 0x0;
3518 		break;
3519 	case CHIP_POLARIS11:
3520 		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3521 			  SE_XSEL(1) | SE_YSEL(1);
3522 		*rconf1 |= 0x0;
3523 		break;
3524 	case CHIP_STONEY:
3525 		*rconf |= 0x0;
3526 		*rconf1 |= 0x0;
3527 		break;
3528 	default:
3529 		DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3530 		break;
3531 	}
3532 }
3533 
3534 static void
3535 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3536 					u32 raster_config, u32 raster_config_1,
3537 					unsigned rb_mask, unsigned num_rb)
3538 {
3539 	unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3540 	unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3541 	unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3542 	unsigned rb_per_se = num_rb / num_se;
3543 	unsigned se_mask[4];
3544 	unsigned se;
3545 
3546 	se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3547 	se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3548 	se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3549 	se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3550 
3551 	WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3552 	WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3553 	WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3554 
3555 	if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3556 			     (!se_mask[2] && !se_mask[3]))) {
3557 		raster_config_1 &= ~SE_PAIR_MAP_MASK;
3558 
3559 		if (!se_mask[0] && !se_mask[1]) {
3560 			raster_config_1 |=
3561 				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3562 		} else {
3563 			raster_config_1 |=
3564 				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3565 		}
3566 	}
3567 
3568 	for (se = 0; se < num_se; se++) {
3569 		unsigned raster_config_se = raster_config;
3570 		unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3571 		unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3572 		int idx = (se / 2) * 2;
3573 
3574 		if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3575 			raster_config_se &= ~SE_MAP_MASK;
3576 
3577 			if (!se_mask[idx]) {
3578 				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3579 			} else {
3580 				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3581 			}
3582 		}
3583 
3584 		pkr0_mask &= rb_mask;
3585 		pkr1_mask &= rb_mask;
3586 		if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3587 			raster_config_se &= ~PKR_MAP_MASK;
3588 
3589 			if (!pkr0_mask) {
3590 				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3591 			} else {
3592 				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3593 			}
3594 		}
3595 
3596 		if (rb_per_se >= 2) {
3597 			unsigned rb0_mask = 1 << (se * rb_per_se);
3598 			unsigned rb1_mask = rb0_mask << 1;
3599 
3600 			rb0_mask &= rb_mask;
3601 			rb1_mask &= rb_mask;
3602 			if (!rb0_mask || !rb1_mask) {
3603 				raster_config_se &= ~RB_MAP_PKR0_MASK;
3604 
3605 				if (!rb0_mask) {
3606 					raster_config_se |=
3607 						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3608 				} else {
3609 					raster_config_se |=
3610 						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3611 				}
3612 			}
3613 
3614 			if (rb_per_se > 2) {
3615 				rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3616 				rb1_mask = rb0_mask << 1;
3617 				rb0_mask &= rb_mask;
3618 				rb1_mask &= rb_mask;
3619 				if (!rb0_mask || !rb1_mask) {
3620 					raster_config_se &= ~RB_MAP_PKR1_MASK;
3621 
3622 					if (!rb0_mask) {
3623 						raster_config_se |=
3624 							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3625 					} else {
3626 						raster_config_se |=
3627 							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3628 					}
3629 				}
3630 			}
3631 		}
3632 
3633 		/* GRBM_GFX_INDEX has a different offset on VI */
3634 		gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3635 		WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3636 		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3637 	}
3638 
3639 	/* GRBM_GFX_INDEX has a different offset on VI */
3640 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3641 }
3642 
3643 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3644 {
3645 	int i, j;
3646 	u32 data;
3647 	u32 raster_config = 0, raster_config_1 = 0;
3648 	u32 active_rbs = 0;
3649 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3650 					adev->gfx.config.max_sh_per_se;
3651 	unsigned num_rb_pipes;
3652 
3653 	mutex_lock(&adev->grbm_idx_mutex);
3654 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3655 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3656 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3657 			data = gfx_v8_0_get_rb_active_bitmap(adev);
3658 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3659 					       rb_bitmap_width_per_sh);
3660 		}
3661 	}
3662 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3663 
3664 	adev->gfx.config.backend_enable_mask = active_rbs;
3665 	adev->gfx.config.num_rbs = hweight32(active_rbs);
3666 
3667 	num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3668 			     adev->gfx.config.max_shader_engines, 16);
3669 
3670 	gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3671 
3672 	if (!adev->gfx.config.backend_enable_mask ||
3673 			adev->gfx.config.num_rbs >= num_rb_pipes) {
3674 		WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3675 		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3676 	} else {
3677 		gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3678 							adev->gfx.config.backend_enable_mask,
3679 							num_rb_pipes);
3680 	}
3681 
3682 	mutex_unlock(&adev->grbm_idx_mutex);
3683 }
3684 
3685 /**
3686  * gfx_v8_0_init_compute_vmid - gart enable
3687  *
3688  * @rdev: amdgpu_device pointer
3689  *
3690  * Initialize compute vmid sh_mem registers
3691  *
3692  */
3693 #define DEFAULT_SH_MEM_BASES	(0x6000)
3694 #define FIRST_COMPUTE_VMID	(8)
3695 #define LAST_COMPUTE_VMID	(16)
3696 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3697 {
3698 	int i;
3699 	uint32_t sh_mem_config;
3700 	uint32_t sh_mem_bases;
3701 
3702 	/*
3703 	 * Configure apertures:
3704 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3705 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3706 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3707 	 */
3708 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3709 
3710 	sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3711 			SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3712 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3713 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3714 			MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3715 			SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3716 
3717 	mutex_lock(&adev->srbm_mutex);
3718 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3719 		vi_srbm_select(adev, 0, 0, 0, i);
3720 		/* CP and shaders */
3721 		WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3722 		WREG32(mmSH_MEM_APE1_BASE, 1);
3723 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3724 		WREG32(mmSH_MEM_BASES, sh_mem_bases);
3725 	}
3726 	vi_srbm_select(adev, 0, 0, 0, 0);
3727 	mutex_unlock(&adev->srbm_mutex);
3728 }
3729 
3730 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3731 {
3732 	u32 tmp;
3733 	int i;
3734 
3735 	WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3736 	WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3737 	WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3738 	WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3739 
3740 	gfx_v8_0_tiling_mode_table_init(adev);
3741 	gfx_v8_0_setup_rb(adev);
3742 	gfx_v8_0_get_cu_info(adev);
3743 
3744 	/* XXX SH_MEM regs */
3745 	/* where to put LDS, scratch, GPUVM in FSA64 space */
3746 	mutex_lock(&adev->srbm_mutex);
3747 	for (i = 0; i < 16; i++) {
3748 		vi_srbm_select(adev, 0, 0, 0, i);
3749 		/* CP and shaders */
3750 		if (i == 0) {
3751 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3752 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3753 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3754 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3755 			WREG32(mmSH_MEM_CONFIG, tmp);
3756 		} else {
3757 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3758 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3759 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3760 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3761 			WREG32(mmSH_MEM_CONFIG, tmp);
3762 		}
3763 
3764 		WREG32(mmSH_MEM_APE1_BASE, 1);
3765 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3766 		WREG32(mmSH_MEM_BASES, 0);
3767 	}
3768 	vi_srbm_select(adev, 0, 0, 0, 0);
3769 	mutex_unlock(&adev->srbm_mutex);
3770 
3771 	gfx_v8_0_init_compute_vmid(adev);
3772 
3773 	mutex_lock(&adev->grbm_idx_mutex);
3774 	/*
3775 	 * making sure that the following register writes will be broadcasted
3776 	 * to all the shaders
3777 	 */
3778 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3779 
3780 	WREG32(mmPA_SC_FIFO_SIZE,
3781 		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
3782 			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3783 		   (adev->gfx.config.sc_prim_fifo_size_backend <<
3784 			PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3785 		   (adev->gfx.config.sc_hiz_tile_fifo_size <<
3786 			PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3787 		   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3788 			PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3789 	mutex_unlock(&adev->grbm_idx_mutex);
3790 
3791 }
3792 
3793 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3794 {
3795 	u32 i, j, k;
3796 	u32 mask;
3797 
3798 	mutex_lock(&adev->grbm_idx_mutex);
3799 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3800 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3801 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3802 			for (k = 0; k < adev->usec_timeout; k++) {
3803 				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3804 					break;
3805 				udelay(1);
3806 			}
3807 		}
3808 	}
3809 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3810 	mutex_unlock(&adev->grbm_idx_mutex);
3811 
3812 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3813 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3814 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3815 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3816 	for (k = 0; k < adev->usec_timeout; k++) {
3817 		if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3818 			break;
3819 		udelay(1);
3820 	}
3821 }
3822 
3823 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3824 					       bool enable)
3825 {
3826 	u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3827 
3828 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3829 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3830 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3831 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3832 
3833 	WREG32(mmCP_INT_CNTL_RING0, tmp);
3834 }
3835 
3836 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3837 {
3838 	/* csib */
3839 	WREG32(mmRLC_CSIB_ADDR_HI,
3840 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
3841 	WREG32(mmRLC_CSIB_ADDR_LO,
3842 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3843 	WREG32(mmRLC_CSIB_LENGTH,
3844 			adev->gfx.rlc.clear_state_size);
3845 }
3846 
3847 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3848 				int ind_offset,
3849 				int list_size,
3850 				int *unique_indices,
3851 				int *indices_count,
3852 				int max_indices,
3853 				int *ind_start_offsets,
3854 				int *offset_count,
3855 				int max_offset)
3856 {
3857 	int indices;
3858 	bool new_entry = true;
3859 
3860 	for (; ind_offset < list_size; ind_offset++) {
3861 
3862 		if (new_entry) {
3863 			new_entry = false;
3864 			ind_start_offsets[*offset_count] = ind_offset;
3865 			*offset_count = *offset_count + 1;
3866 			BUG_ON(*offset_count >= max_offset);
3867 		}
3868 
3869 		if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3870 			new_entry = true;
3871 			continue;
3872 		}
3873 
3874 		ind_offset += 2;
3875 
3876 		/* look for the matching indice */
3877 		for (indices = 0;
3878 			indices < *indices_count;
3879 			indices++) {
3880 			if (unique_indices[indices] ==
3881 				register_list_format[ind_offset])
3882 				break;
3883 		}
3884 
3885 		if (indices >= *indices_count) {
3886 			unique_indices[*indices_count] =
3887 				register_list_format[ind_offset];
3888 			indices = *indices_count;
3889 			*indices_count = *indices_count + 1;
3890 			BUG_ON(*indices_count >= max_indices);
3891 		}
3892 
3893 		register_list_format[ind_offset] = indices;
3894 	}
3895 }
3896 
3897 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3898 {
3899 	int i, temp, data;
3900 	int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3901 	int indices_count = 0;
3902 	int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3903 	int offset_count = 0;
3904 
3905 	int list_size;
3906 	unsigned int *register_list_format =
3907 		kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3908 	if (register_list_format == NULL)
3909 		return -ENOMEM;
3910 	memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3911 			adev->gfx.rlc.reg_list_format_size_bytes);
3912 
3913 	gfx_v8_0_parse_ind_reg_list(register_list_format,
3914 				RLC_FormatDirectRegListLength,
3915 				adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3916 				unique_indices,
3917 				&indices_count,
3918 				sizeof(unique_indices) / sizeof(int),
3919 				indirect_start_offsets,
3920 				&offset_count,
3921 				sizeof(indirect_start_offsets)/sizeof(int));
3922 
3923 	/* save and restore list */
3924 	WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3925 
3926 	WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3927 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3928 		WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3929 
3930 	/* indirect list */
3931 	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3932 	for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3933 		WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3934 
3935 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3936 	list_size = list_size >> 1;
3937 	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3938 	WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3939 
3940 	/* starting offsets starts */
3941 	WREG32(mmRLC_GPM_SCRATCH_ADDR,
3942 		adev->gfx.rlc.starting_offsets_start);
3943 	for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3944 		WREG32(mmRLC_GPM_SCRATCH_DATA,
3945 				indirect_start_offsets[i]);
3946 
3947 	/* unique indices */
3948 	temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3949 	data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3950 	for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3951 		amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false);
3952 		amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false);
3953 	}
3954 	kfree(register_list_format);
3955 
3956 	return 0;
3957 }
3958 
3959 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3960 {
3961 	WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3962 }
3963 
3964 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3965 {
3966 	uint32_t data;
3967 
3968 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3969 			      AMD_PG_SUPPORT_GFX_SMG |
3970 			      AMD_PG_SUPPORT_GFX_DMG)) {
3971 		WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
3972 
3973 		data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
3974 		data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
3975 		data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
3976 		data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
3977 		WREG32(mmRLC_PG_DELAY, data);
3978 
3979 		WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
3980 		WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
3981 	}
3982 }
3983 
3984 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
3985 						bool enable)
3986 {
3987 	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
3988 }
3989 
3990 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
3991 						  bool enable)
3992 {
3993 	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
3994 }
3995 
3996 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
3997 {
3998 	WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 1 : 0);
3999 }
4000 
4001 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4002 {
4003 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
4004 			      AMD_PG_SUPPORT_GFX_SMG |
4005 			      AMD_PG_SUPPORT_GFX_DMG |
4006 			      AMD_PG_SUPPORT_CP |
4007 			      AMD_PG_SUPPORT_GDS |
4008 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
4009 		gfx_v8_0_init_csb(adev);
4010 		gfx_v8_0_init_save_restore_list(adev);
4011 		gfx_v8_0_enable_save_restore_machine(adev);
4012 
4013 		if ((adev->asic_type == CHIP_CARRIZO) ||
4014 		    (adev->asic_type == CHIP_STONEY)) {
4015 			WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4016 			gfx_v8_0_init_power_gating(adev);
4017 			WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4018 			if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4019 				cz_enable_sck_slow_down_on_power_up(adev, true);
4020 				cz_enable_sck_slow_down_on_power_down(adev, true);
4021 			} else {
4022 				cz_enable_sck_slow_down_on_power_up(adev, false);
4023 				cz_enable_sck_slow_down_on_power_down(adev, false);
4024 			}
4025 			if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4026 				cz_enable_cp_power_gating(adev, true);
4027 			else
4028 				cz_enable_cp_power_gating(adev, false);
4029 		} else if (adev->asic_type == CHIP_POLARIS11) {
4030 			gfx_v8_0_init_power_gating(adev);
4031 		}
4032 	}
4033 }
4034 
4035 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4036 {
4037 	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4038 
4039 	gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4040 	gfx_v8_0_wait_for_rlc_serdes(adev);
4041 }
4042 
4043 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4044 {
4045 	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4046 	udelay(50);
4047 
4048 	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4049 	udelay(50);
4050 }
4051 
4052 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4053 {
4054 	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4055 
4056 	/* carrizo do enable cp interrupt after cp inited */
4057 	if (!(adev->flags & AMD_IS_APU))
4058 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4059 
4060 	udelay(50);
4061 }
4062 
4063 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4064 {
4065 	const struct rlc_firmware_header_v2_0 *hdr;
4066 	const __le32 *fw_data;
4067 	unsigned i, fw_size;
4068 
4069 	if (!adev->gfx.rlc_fw)
4070 		return -EINVAL;
4071 
4072 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4073 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
4074 
4075 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4076 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4077 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4078 
4079 	WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4080 	for (i = 0; i < fw_size; i++)
4081 		WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4082 	WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4083 
4084 	return 0;
4085 }
4086 
4087 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4088 {
4089 	int r;
4090 
4091 	gfx_v8_0_rlc_stop(adev);
4092 
4093 	/* disable CG */
4094 	WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
4095 	if (adev->asic_type == CHIP_POLARIS11 ||
4096 	    adev->asic_type == CHIP_POLARIS10)
4097 		WREG32(mmRLC_CGCG_CGLS_CTRL_3D, 0);
4098 
4099 	/* disable PG */
4100 	WREG32(mmRLC_PG_CNTL, 0);
4101 
4102 	gfx_v8_0_rlc_reset(adev);
4103 	gfx_v8_0_init_pg(adev);
4104 
4105 	if (!adev->pp_enabled) {
4106 		if (!adev->firmware.smu_load) {
4107 			/* legacy rlc firmware loading */
4108 			r = gfx_v8_0_rlc_load_microcode(adev);
4109 			if (r)
4110 				return r;
4111 		} else {
4112 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4113 							AMDGPU_UCODE_ID_RLC_G);
4114 			if (r)
4115 				return -EINVAL;
4116 		}
4117 	}
4118 
4119 	gfx_v8_0_rlc_start(adev);
4120 
4121 	return 0;
4122 }
4123 
4124 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4125 {
4126 	int i;
4127 	u32 tmp = RREG32(mmCP_ME_CNTL);
4128 
4129 	if (enable) {
4130 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4131 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4132 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4133 	} else {
4134 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4135 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4136 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4137 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4138 			adev->gfx.gfx_ring[i].ready = false;
4139 	}
4140 	WREG32(mmCP_ME_CNTL, tmp);
4141 	udelay(50);
4142 }
4143 
4144 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4145 {
4146 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
4147 	const struct gfx_firmware_header_v1_0 *ce_hdr;
4148 	const struct gfx_firmware_header_v1_0 *me_hdr;
4149 	const __le32 *fw_data;
4150 	unsigned i, fw_size;
4151 
4152 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4153 		return -EINVAL;
4154 
4155 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4156 		adev->gfx.pfp_fw->data;
4157 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4158 		adev->gfx.ce_fw->data;
4159 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
4160 		adev->gfx.me_fw->data;
4161 
4162 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4163 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4164 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4165 
4166 	gfx_v8_0_cp_gfx_enable(adev, false);
4167 
4168 	/* PFP */
4169 	fw_data = (const __le32 *)
4170 		(adev->gfx.pfp_fw->data +
4171 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4172 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4173 	WREG32(mmCP_PFP_UCODE_ADDR, 0);
4174 	for (i = 0; i < fw_size; i++)
4175 		WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4176 	WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4177 
4178 	/* CE */
4179 	fw_data = (const __le32 *)
4180 		(adev->gfx.ce_fw->data +
4181 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4182 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4183 	WREG32(mmCP_CE_UCODE_ADDR, 0);
4184 	for (i = 0; i < fw_size; i++)
4185 		WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4186 	WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4187 
4188 	/* ME */
4189 	fw_data = (const __le32 *)
4190 		(adev->gfx.me_fw->data +
4191 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4192 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4193 	WREG32(mmCP_ME_RAM_WADDR, 0);
4194 	for (i = 0; i < fw_size; i++)
4195 		WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4196 	WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4197 
4198 	return 0;
4199 }
4200 
4201 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4202 {
4203 	u32 count = 0;
4204 	const struct cs_section_def *sect = NULL;
4205 	const struct cs_extent_def *ext = NULL;
4206 
4207 	/* begin clear state */
4208 	count += 2;
4209 	/* context control state */
4210 	count += 3;
4211 
4212 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4213 		for (ext = sect->section; ext->extent != NULL; ++ext) {
4214 			if (sect->id == SECT_CONTEXT)
4215 				count += 2 + ext->reg_count;
4216 			else
4217 				return 0;
4218 		}
4219 	}
4220 	/* pa_sc_raster_config/pa_sc_raster_config1 */
4221 	count += 4;
4222 	/* end clear state */
4223 	count += 2;
4224 	/* clear state */
4225 	count += 2;
4226 
4227 	return count;
4228 }
4229 
4230 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4231 {
4232 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4233 	const struct cs_section_def *sect = NULL;
4234 	const struct cs_extent_def *ext = NULL;
4235 	int r, i;
4236 
4237 	/* init the CP */
4238 	WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4239 	WREG32(mmCP_ENDIAN_SWAP, 0);
4240 	WREG32(mmCP_DEVICE_ID, 1);
4241 
4242 	gfx_v8_0_cp_gfx_enable(adev, true);
4243 
4244 	r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4245 	if (r) {
4246 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4247 		return r;
4248 	}
4249 
4250 	/* clear state buffer */
4251 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4252 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4253 
4254 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4255 	amdgpu_ring_write(ring, 0x80000000);
4256 	amdgpu_ring_write(ring, 0x80000000);
4257 
4258 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4259 		for (ext = sect->section; ext->extent != NULL; ++ext) {
4260 			if (sect->id == SECT_CONTEXT) {
4261 				amdgpu_ring_write(ring,
4262 				       PACKET3(PACKET3_SET_CONTEXT_REG,
4263 					       ext->reg_count));
4264 				amdgpu_ring_write(ring,
4265 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4266 				for (i = 0; i < ext->reg_count; i++)
4267 					amdgpu_ring_write(ring, ext->extent[i]);
4268 			}
4269 		}
4270 	}
4271 
4272 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4273 	amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4274 	switch (adev->asic_type) {
4275 	case CHIP_TONGA:
4276 	case CHIP_POLARIS10:
4277 		amdgpu_ring_write(ring, 0x16000012);
4278 		amdgpu_ring_write(ring, 0x0000002A);
4279 		break;
4280 	case CHIP_POLARIS11:
4281 		amdgpu_ring_write(ring, 0x16000012);
4282 		amdgpu_ring_write(ring, 0x00000000);
4283 		break;
4284 	case CHIP_FIJI:
4285 		amdgpu_ring_write(ring, 0x3a00161a);
4286 		amdgpu_ring_write(ring, 0x0000002e);
4287 		break;
4288 	case CHIP_CARRIZO:
4289 		amdgpu_ring_write(ring, 0x00000002);
4290 		amdgpu_ring_write(ring, 0x00000000);
4291 		break;
4292 	case CHIP_TOPAZ:
4293 		amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4294 				0x00000000 : 0x00000002);
4295 		amdgpu_ring_write(ring, 0x00000000);
4296 		break;
4297 	case CHIP_STONEY:
4298 		amdgpu_ring_write(ring, 0x00000000);
4299 		amdgpu_ring_write(ring, 0x00000000);
4300 		break;
4301 	default:
4302 		BUG();
4303 	}
4304 
4305 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4306 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4307 
4308 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4309 	amdgpu_ring_write(ring, 0);
4310 
4311 	/* init the CE partitions */
4312 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4313 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4314 	amdgpu_ring_write(ring, 0x8000);
4315 	amdgpu_ring_write(ring, 0x8000);
4316 
4317 	amdgpu_ring_commit(ring);
4318 
4319 	return 0;
4320 }
4321 
4322 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4323 {
4324 	struct amdgpu_ring *ring;
4325 	u32 tmp;
4326 	u32 rb_bufsz;
4327 	u64 rb_addr, rptr_addr;
4328 	int r;
4329 
4330 	/* Set the write pointer delay */
4331 	WREG32(mmCP_RB_WPTR_DELAY, 0);
4332 
4333 	/* set the RB to use vmid 0 */
4334 	WREG32(mmCP_RB_VMID, 0);
4335 
4336 	/* Set ring buffer size */
4337 	ring = &adev->gfx.gfx_ring[0];
4338 	rb_bufsz = order_base_2(ring->ring_size / 8);
4339 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4340 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4341 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4342 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4343 #ifdef __BIG_ENDIAN
4344 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4345 #endif
4346 	WREG32(mmCP_RB0_CNTL, tmp);
4347 
4348 	/* Initialize the ring buffer's read and write pointers */
4349 	WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4350 	ring->wptr = 0;
4351 	WREG32(mmCP_RB0_WPTR, ring->wptr);
4352 
4353 	/* set the wb address wether it's enabled or not */
4354 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4355 	WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4356 	WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4357 
4358 	mdelay(1);
4359 	WREG32(mmCP_RB0_CNTL, tmp);
4360 
4361 	rb_addr = ring->gpu_addr >> 8;
4362 	WREG32(mmCP_RB0_BASE, rb_addr);
4363 	WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4364 
4365 	/* no gfx doorbells on iceland */
4366 	if (adev->asic_type != CHIP_TOPAZ) {
4367 		tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4368 		if (ring->use_doorbell) {
4369 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4370 					    DOORBELL_OFFSET, ring->doorbell_index);
4371 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4372 					    DOORBELL_HIT, 0);
4373 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4374 					    DOORBELL_EN, 1);
4375 		} else {
4376 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4377 					    DOORBELL_EN, 0);
4378 		}
4379 		WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4380 
4381 		if (adev->asic_type == CHIP_TONGA) {
4382 			tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4383 					    DOORBELL_RANGE_LOWER,
4384 					    AMDGPU_DOORBELL_GFX_RING0);
4385 			WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4386 
4387 			WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4388 			       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4389 		}
4390 
4391 	}
4392 
4393 	/* start the ring */
4394 	gfx_v8_0_cp_gfx_start(adev);
4395 	ring->ready = true;
4396 	r = amdgpu_ring_test_ring(ring);
4397 	if (r)
4398 		ring->ready = false;
4399 
4400 	return r;
4401 }
4402 
4403 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4404 {
4405 	int i;
4406 
4407 	if (enable) {
4408 		WREG32(mmCP_MEC_CNTL, 0);
4409 	} else {
4410 		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4411 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
4412 			adev->gfx.compute_ring[i].ready = false;
4413 	}
4414 	udelay(50);
4415 }
4416 
4417 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4418 {
4419 	const struct gfx_firmware_header_v1_0 *mec_hdr;
4420 	const __le32 *fw_data;
4421 	unsigned i, fw_size;
4422 
4423 	if (!adev->gfx.mec_fw)
4424 		return -EINVAL;
4425 
4426 	gfx_v8_0_cp_compute_enable(adev, false);
4427 
4428 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4429 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4430 
4431 	fw_data = (const __le32 *)
4432 		(adev->gfx.mec_fw->data +
4433 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4434 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4435 
4436 	/* MEC1 */
4437 	WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4438 	for (i = 0; i < fw_size; i++)
4439 		WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4440 	WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4441 
4442 	/* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4443 	if (adev->gfx.mec2_fw) {
4444 		const struct gfx_firmware_header_v1_0 *mec2_hdr;
4445 
4446 		mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4447 		amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4448 
4449 		fw_data = (const __le32 *)
4450 			(adev->gfx.mec2_fw->data +
4451 			 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4452 		fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4453 
4454 		WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4455 		for (i = 0; i < fw_size; i++)
4456 			WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4457 		WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4458 	}
4459 
4460 	return 0;
4461 }
4462 
4463 struct vi_mqd {
4464 	uint32_t header;  /* ordinal0 */
4465 	uint32_t compute_dispatch_initiator;  /* ordinal1 */
4466 	uint32_t compute_dim_x;  /* ordinal2 */
4467 	uint32_t compute_dim_y;  /* ordinal3 */
4468 	uint32_t compute_dim_z;  /* ordinal4 */
4469 	uint32_t compute_start_x;  /* ordinal5 */
4470 	uint32_t compute_start_y;  /* ordinal6 */
4471 	uint32_t compute_start_z;  /* ordinal7 */
4472 	uint32_t compute_num_thread_x;  /* ordinal8 */
4473 	uint32_t compute_num_thread_y;  /* ordinal9 */
4474 	uint32_t compute_num_thread_z;  /* ordinal10 */
4475 	uint32_t compute_pipelinestat_enable;  /* ordinal11 */
4476 	uint32_t compute_perfcount_enable;  /* ordinal12 */
4477 	uint32_t compute_pgm_lo;  /* ordinal13 */
4478 	uint32_t compute_pgm_hi;  /* ordinal14 */
4479 	uint32_t compute_tba_lo;  /* ordinal15 */
4480 	uint32_t compute_tba_hi;  /* ordinal16 */
4481 	uint32_t compute_tma_lo;  /* ordinal17 */
4482 	uint32_t compute_tma_hi;  /* ordinal18 */
4483 	uint32_t compute_pgm_rsrc1;  /* ordinal19 */
4484 	uint32_t compute_pgm_rsrc2;  /* ordinal20 */
4485 	uint32_t compute_vmid;  /* ordinal21 */
4486 	uint32_t compute_resource_limits;  /* ordinal22 */
4487 	uint32_t compute_static_thread_mgmt_se0;  /* ordinal23 */
4488 	uint32_t compute_static_thread_mgmt_se1;  /* ordinal24 */
4489 	uint32_t compute_tmpring_size;  /* ordinal25 */
4490 	uint32_t compute_static_thread_mgmt_se2;  /* ordinal26 */
4491 	uint32_t compute_static_thread_mgmt_se3;  /* ordinal27 */
4492 	uint32_t compute_restart_x;  /* ordinal28 */
4493 	uint32_t compute_restart_y;  /* ordinal29 */
4494 	uint32_t compute_restart_z;  /* ordinal30 */
4495 	uint32_t compute_thread_trace_enable;  /* ordinal31 */
4496 	uint32_t compute_misc_reserved;  /* ordinal32 */
4497 	uint32_t compute_dispatch_id;  /* ordinal33 */
4498 	uint32_t compute_threadgroup_id;  /* ordinal34 */
4499 	uint32_t compute_relaunch;  /* ordinal35 */
4500 	uint32_t compute_wave_restore_addr_lo;  /* ordinal36 */
4501 	uint32_t compute_wave_restore_addr_hi;  /* ordinal37 */
4502 	uint32_t compute_wave_restore_control;  /* ordinal38 */
4503 	uint32_t reserved9;  /* ordinal39 */
4504 	uint32_t reserved10;  /* ordinal40 */
4505 	uint32_t reserved11;  /* ordinal41 */
4506 	uint32_t reserved12;  /* ordinal42 */
4507 	uint32_t reserved13;  /* ordinal43 */
4508 	uint32_t reserved14;  /* ordinal44 */
4509 	uint32_t reserved15;  /* ordinal45 */
4510 	uint32_t reserved16;  /* ordinal46 */
4511 	uint32_t reserved17;  /* ordinal47 */
4512 	uint32_t reserved18;  /* ordinal48 */
4513 	uint32_t reserved19;  /* ordinal49 */
4514 	uint32_t reserved20;  /* ordinal50 */
4515 	uint32_t reserved21;  /* ordinal51 */
4516 	uint32_t reserved22;  /* ordinal52 */
4517 	uint32_t reserved23;  /* ordinal53 */
4518 	uint32_t reserved24;  /* ordinal54 */
4519 	uint32_t reserved25;  /* ordinal55 */
4520 	uint32_t reserved26;  /* ordinal56 */
4521 	uint32_t reserved27;  /* ordinal57 */
4522 	uint32_t reserved28;  /* ordinal58 */
4523 	uint32_t reserved29;  /* ordinal59 */
4524 	uint32_t reserved30;  /* ordinal60 */
4525 	uint32_t reserved31;  /* ordinal61 */
4526 	uint32_t reserved32;  /* ordinal62 */
4527 	uint32_t reserved33;  /* ordinal63 */
4528 	uint32_t reserved34;  /* ordinal64 */
4529 	uint32_t compute_user_data_0;  /* ordinal65 */
4530 	uint32_t compute_user_data_1;  /* ordinal66 */
4531 	uint32_t compute_user_data_2;  /* ordinal67 */
4532 	uint32_t compute_user_data_3;  /* ordinal68 */
4533 	uint32_t compute_user_data_4;  /* ordinal69 */
4534 	uint32_t compute_user_data_5;  /* ordinal70 */
4535 	uint32_t compute_user_data_6;  /* ordinal71 */
4536 	uint32_t compute_user_data_7;  /* ordinal72 */
4537 	uint32_t compute_user_data_8;  /* ordinal73 */
4538 	uint32_t compute_user_data_9;  /* ordinal74 */
4539 	uint32_t compute_user_data_10;  /* ordinal75 */
4540 	uint32_t compute_user_data_11;  /* ordinal76 */
4541 	uint32_t compute_user_data_12;  /* ordinal77 */
4542 	uint32_t compute_user_data_13;  /* ordinal78 */
4543 	uint32_t compute_user_data_14;  /* ordinal79 */
4544 	uint32_t compute_user_data_15;  /* ordinal80 */
4545 	uint32_t cp_compute_csinvoc_count_lo;  /* ordinal81 */
4546 	uint32_t cp_compute_csinvoc_count_hi;  /* ordinal82 */
4547 	uint32_t reserved35;  /* ordinal83 */
4548 	uint32_t reserved36;  /* ordinal84 */
4549 	uint32_t reserved37;  /* ordinal85 */
4550 	uint32_t cp_mqd_query_time_lo;  /* ordinal86 */
4551 	uint32_t cp_mqd_query_time_hi;  /* ordinal87 */
4552 	uint32_t cp_mqd_connect_start_time_lo;  /* ordinal88 */
4553 	uint32_t cp_mqd_connect_start_time_hi;  /* ordinal89 */
4554 	uint32_t cp_mqd_connect_end_time_lo;  /* ordinal90 */
4555 	uint32_t cp_mqd_connect_end_time_hi;  /* ordinal91 */
4556 	uint32_t cp_mqd_connect_end_wf_count;  /* ordinal92 */
4557 	uint32_t cp_mqd_connect_end_pq_rptr;  /* ordinal93 */
4558 	uint32_t cp_mqd_connect_end_pq_wptr;  /* ordinal94 */
4559 	uint32_t cp_mqd_connect_end_ib_rptr;  /* ordinal95 */
4560 	uint32_t reserved38;  /* ordinal96 */
4561 	uint32_t reserved39;  /* ordinal97 */
4562 	uint32_t cp_mqd_save_start_time_lo;  /* ordinal98 */
4563 	uint32_t cp_mqd_save_start_time_hi;  /* ordinal99 */
4564 	uint32_t cp_mqd_save_end_time_lo;  /* ordinal100 */
4565 	uint32_t cp_mqd_save_end_time_hi;  /* ordinal101 */
4566 	uint32_t cp_mqd_restore_start_time_lo;  /* ordinal102 */
4567 	uint32_t cp_mqd_restore_start_time_hi;  /* ordinal103 */
4568 	uint32_t cp_mqd_restore_end_time_lo;  /* ordinal104 */
4569 	uint32_t cp_mqd_restore_end_time_hi;  /* ordinal105 */
4570 	uint32_t reserved40;  /* ordinal106 */
4571 	uint32_t reserved41;  /* ordinal107 */
4572 	uint32_t gds_cs_ctxsw_cnt0;  /* ordinal108 */
4573 	uint32_t gds_cs_ctxsw_cnt1;  /* ordinal109 */
4574 	uint32_t gds_cs_ctxsw_cnt2;  /* ordinal110 */
4575 	uint32_t gds_cs_ctxsw_cnt3;  /* ordinal111 */
4576 	uint32_t reserved42;  /* ordinal112 */
4577 	uint32_t reserved43;  /* ordinal113 */
4578 	uint32_t cp_pq_exe_status_lo;  /* ordinal114 */
4579 	uint32_t cp_pq_exe_status_hi;  /* ordinal115 */
4580 	uint32_t cp_packet_id_lo;  /* ordinal116 */
4581 	uint32_t cp_packet_id_hi;  /* ordinal117 */
4582 	uint32_t cp_packet_exe_status_lo;  /* ordinal118 */
4583 	uint32_t cp_packet_exe_status_hi;  /* ordinal119 */
4584 	uint32_t gds_save_base_addr_lo;  /* ordinal120 */
4585 	uint32_t gds_save_base_addr_hi;  /* ordinal121 */
4586 	uint32_t gds_save_mask_lo;  /* ordinal122 */
4587 	uint32_t gds_save_mask_hi;  /* ordinal123 */
4588 	uint32_t ctx_save_base_addr_lo;  /* ordinal124 */
4589 	uint32_t ctx_save_base_addr_hi;  /* ordinal125 */
4590 	uint32_t reserved44;  /* ordinal126 */
4591 	uint32_t reserved45;  /* ordinal127 */
4592 	uint32_t cp_mqd_base_addr_lo;  /* ordinal128 */
4593 	uint32_t cp_mqd_base_addr_hi;  /* ordinal129 */
4594 	uint32_t cp_hqd_active;  /* ordinal130 */
4595 	uint32_t cp_hqd_vmid;  /* ordinal131 */
4596 	uint32_t cp_hqd_persistent_state;  /* ordinal132 */
4597 	uint32_t cp_hqd_pipe_priority;  /* ordinal133 */
4598 	uint32_t cp_hqd_queue_priority;  /* ordinal134 */
4599 	uint32_t cp_hqd_quantum;  /* ordinal135 */
4600 	uint32_t cp_hqd_pq_base_lo;  /* ordinal136 */
4601 	uint32_t cp_hqd_pq_base_hi;  /* ordinal137 */
4602 	uint32_t cp_hqd_pq_rptr;  /* ordinal138 */
4603 	uint32_t cp_hqd_pq_rptr_report_addr_lo;  /* ordinal139 */
4604 	uint32_t cp_hqd_pq_rptr_report_addr_hi;  /* ordinal140 */
4605 	uint32_t cp_hqd_pq_wptr_poll_addr;  /* ordinal141 */
4606 	uint32_t cp_hqd_pq_wptr_poll_addr_hi;  /* ordinal142 */
4607 	uint32_t cp_hqd_pq_doorbell_control;  /* ordinal143 */
4608 	uint32_t cp_hqd_pq_wptr;  /* ordinal144 */
4609 	uint32_t cp_hqd_pq_control;  /* ordinal145 */
4610 	uint32_t cp_hqd_ib_base_addr_lo;  /* ordinal146 */
4611 	uint32_t cp_hqd_ib_base_addr_hi;  /* ordinal147 */
4612 	uint32_t cp_hqd_ib_rptr;  /* ordinal148 */
4613 	uint32_t cp_hqd_ib_control;  /* ordinal149 */
4614 	uint32_t cp_hqd_iq_timer;  /* ordinal150 */
4615 	uint32_t cp_hqd_iq_rptr;  /* ordinal151 */
4616 	uint32_t cp_hqd_dequeue_request;  /* ordinal152 */
4617 	uint32_t cp_hqd_dma_offload;  /* ordinal153 */
4618 	uint32_t cp_hqd_sema_cmd;  /* ordinal154 */
4619 	uint32_t cp_hqd_msg_type;  /* ordinal155 */
4620 	uint32_t cp_hqd_atomic0_preop_lo;  /* ordinal156 */
4621 	uint32_t cp_hqd_atomic0_preop_hi;  /* ordinal157 */
4622 	uint32_t cp_hqd_atomic1_preop_lo;  /* ordinal158 */
4623 	uint32_t cp_hqd_atomic1_preop_hi;  /* ordinal159 */
4624 	uint32_t cp_hqd_hq_status0;  /* ordinal160 */
4625 	uint32_t cp_hqd_hq_control0;  /* ordinal161 */
4626 	uint32_t cp_mqd_control;  /* ordinal162 */
4627 	uint32_t cp_hqd_hq_status1;  /* ordinal163 */
4628 	uint32_t cp_hqd_hq_control1;  /* ordinal164 */
4629 	uint32_t cp_hqd_eop_base_addr_lo;  /* ordinal165 */
4630 	uint32_t cp_hqd_eop_base_addr_hi;  /* ordinal166 */
4631 	uint32_t cp_hqd_eop_control;  /* ordinal167 */
4632 	uint32_t cp_hqd_eop_rptr;  /* ordinal168 */
4633 	uint32_t cp_hqd_eop_wptr;  /* ordinal169 */
4634 	uint32_t cp_hqd_eop_done_events;  /* ordinal170 */
4635 	uint32_t cp_hqd_ctx_save_base_addr_lo;  /* ordinal171 */
4636 	uint32_t cp_hqd_ctx_save_base_addr_hi;  /* ordinal172 */
4637 	uint32_t cp_hqd_ctx_save_control;  /* ordinal173 */
4638 	uint32_t cp_hqd_cntl_stack_offset;  /* ordinal174 */
4639 	uint32_t cp_hqd_cntl_stack_size;  /* ordinal175 */
4640 	uint32_t cp_hqd_wg_state_offset;  /* ordinal176 */
4641 	uint32_t cp_hqd_ctx_save_size;  /* ordinal177 */
4642 	uint32_t cp_hqd_gds_resource_state;  /* ordinal178 */
4643 	uint32_t cp_hqd_error;  /* ordinal179 */
4644 	uint32_t cp_hqd_eop_wptr_mem;  /* ordinal180 */
4645 	uint32_t cp_hqd_eop_dones;  /* ordinal181 */
4646 	uint32_t reserved46;  /* ordinal182 */
4647 	uint32_t reserved47;  /* ordinal183 */
4648 	uint32_t reserved48;  /* ordinal184 */
4649 	uint32_t reserved49;  /* ordinal185 */
4650 	uint32_t reserved50;  /* ordinal186 */
4651 	uint32_t reserved51;  /* ordinal187 */
4652 	uint32_t reserved52;  /* ordinal188 */
4653 	uint32_t reserved53;  /* ordinal189 */
4654 	uint32_t reserved54;  /* ordinal190 */
4655 	uint32_t reserved55;  /* ordinal191 */
4656 	uint32_t iqtimer_pkt_header;  /* ordinal192 */
4657 	uint32_t iqtimer_pkt_dw0;  /* ordinal193 */
4658 	uint32_t iqtimer_pkt_dw1;  /* ordinal194 */
4659 	uint32_t iqtimer_pkt_dw2;  /* ordinal195 */
4660 	uint32_t iqtimer_pkt_dw3;  /* ordinal196 */
4661 	uint32_t iqtimer_pkt_dw4;  /* ordinal197 */
4662 	uint32_t iqtimer_pkt_dw5;  /* ordinal198 */
4663 	uint32_t iqtimer_pkt_dw6;  /* ordinal199 */
4664 	uint32_t iqtimer_pkt_dw7;  /* ordinal200 */
4665 	uint32_t iqtimer_pkt_dw8;  /* ordinal201 */
4666 	uint32_t iqtimer_pkt_dw9;  /* ordinal202 */
4667 	uint32_t iqtimer_pkt_dw10;  /* ordinal203 */
4668 	uint32_t iqtimer_pkt_dw11;  /* ordinal204 */
4669 	uint32_t iqtimer_pkt_dw12;  /* ordinal205 */
4670 	uint32_t iqtimer_pkt_dw13;  /* ordinal206 */
4671 	uint32_t iqtimer_pkt_dw14;  /* ordinal207 */
4672 	uint32_t iqtimer_pkt_dw15;  /* ordinal208 */
4673 	uint32_t iqtimer_pkt_dw16;  /* ordinal209 */
4674 	uint32_t iqtimer_pkt_dw17;  /* ordinal210 */
4675 	uint32_t iqtimer_pkt_dw18;  /* ordinal211 */
4676 	uint32_t iqtimer_pkt_dw19;  /* ordinal212 */
4677 	uint32_t iqtimer_pkt_dw20;  /* ordinal213 */
4678 	uint32_t iqtimer_pkt_dw21;  /* ordinal214 */
4679 	uint32_t iqtimer_pkt_dw22;  /* ordinal215 */
4680 	uint32_t iqtimer_pkt_dw23;  /* ordinal216 */
4681 	uint32_t iqtimer_pkt_dw24;  /* ordinal217 */
4682 	uint32_t iqtimer_pkt_dw25;  /* ordinal218 */
4683 	uint32_t iqtimer_pkt_dw26;  /* ordinal219 */
4684 	uint32_t iqtimer_pkt_dw27;  /* ordinal220 */
4685 	uint32_t iqtimer_pkt_dw28;  /* ordinal221 */
4686 	uint32_t iqtimer_pkt_dw29;  /* ordinal222 */
4687 	uint32_t iqtimer_pkt_dw30;  /* ordinal223 */
4688 	uint32_t iqtimer_pkt_dw31;  /* ordinal224 */
4689 	uint32_t reserved56;  /* ordinal225 */
4690 	uint32_t reserved57;  /* ordinal226 */
4691 	uint32_t reserved58;  /* ordinal227 */
4692 	uint32_t set_resources_header;  /* ordinal228 */
4693 	uint32_t set_resources_dw1;  /* ordinal229 */
4694 	uint32_t set_resources_dw2;  /* ordinal230 */
4695 	uint32_t set_resources_dw3;  /* ordinal231 */
4696 	uint32_t set_resources_dw4;  /* ordinal232 */
4697 	uint32_t set_resources_dw5;  /* ordinal233 */
4698 	uint32_t set_resources_dw6;  /* ordinal234 */
4699 	uint32_t set_resources_dw7;  /* ordinal235 */
4700 	uint32_t reserved59;  /* ordinal236 */
4701 	uint32_t reserved60;  /* ordinal237 */
4702 	uint32_t reserved61;  /* ordinal238 */
4703 	uint32_t reserved62;  /* ordinal239 */
4704 	uint32_t reserved63;  /* ordinal240 */
4705 	uint32_t reserved64;  /* ordinal241 */
4706 	uint32_t reserved65;  /* ordinal242 */
4707 	uint32_t reserved66;  /* ordinal243 */
4708 	uint32_t reserved67;  /* ordinal244 */
4709 	uint32_t reserved68;  /* ordinal245 */
4710 	uint32_t reserved69;  /* ordinal246 */
4711 	uint32_t reserved70;  /* ordinal247 */
4712 	uint32_t reserved71;  /* ordinal248 */
4713 	uint32_t reserved72;  /* ordinal249 */
4714 	uint32_t reserved73;  /* ordinal250 */
4715 	uint32_t reserved74;  /* ordinal251 */
4716 	uint32_t reserved75;  /* ordinal252 */
4717 	uint32_t reserved76;  /* ordinal253 */
4718 	uint32_t reserved77;  /* ordinal254 */
4719 	uint32_t reserved78;  /* ordinal255 */
4720 
4721 	uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
4722 };
4723 
4724 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4725 {
4726 	int i, r;
4727 
4728 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4729 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4730 
4731 		if (ring->mqd_obj) {
4732 			r = amdgpu_bo_reserve(ring->mqd_obj, false);
4733 			if (unlikely(r != 0))
4734 				dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4735 
4736 			amdgpu_bo_unpin(ring->mqd_obj);
4737 			amdgpu_bo_unreserve(ring->mqd_obj);
4738 
4739 			amdgpu_bo_unref(&ring->mqd_obj);
4740 			ring->mqd_obj = NULL;
4741 		}
4742 	}
4743 }
4744 
4745 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4746 {
4747 	int r, i, j;
4748 	u32 tmp;
4749 	bool use_doorbell = true;
4750 	u64 hqd_gpu_addr;
4751 	u64 mqd_gpu_addr;
4752 	u64 eop_gpu_addr;
4753 	u64 wb_gpu_addr;
4754 	u32 *buf;
4755 	struct vi_mqd *mqd;
4756 
4757 	/* init the pipes */
4758 	mutex_lock(&adev->srbm_mutex);
4759 	for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
4760 		int me = (i < 4) ? 1 : 2;
4761 		int pipe = (i < 4) ? i : (i - 4);
4762 
4763 		eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
4764 		eop_gpu_addr >>= 8;
4765 
4766 		vi_srbm_select(adev, me, pipe, 0, 0);
4767 
4768 		/* write the EOP addr */
4769 		WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
4770 		WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4771 
4772 		/* set the VMID assigned */
4773 		WREG32(mmCP_HQD_VMID, 0);
4774 
4775 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4776 		tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4777 		tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4778 				    (order_base_2(MEC_HPD_SIZE / 4) - 1));
4779 		WREG32(mmCP_HQD_EOP_CONTROL, tmp);
4780 	}
4781 	vi_srbm_select(adev, 0, 0, 0, 0);
4782 	mutex_unlock(&adev->srbm_mutex);
4783 
4784 	/* init the queues.  Just two for now. */
4785 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4786 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4787 
4788 		if (ring->mqd_obj == NULL) {
4789 			r = amdgpu_bo_create(adev,
4790 					     sizeof(struct vi_mqd),
4791 					     PAGE_SIZE, true,
4792 					     AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4793 					     NULL, &ring->mqd_obj);
4794 			if (r) {
4795 				dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4796 				return r;
4797 			}
4798 		}
4799 
4800 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
4801 		if (unlikely(r != 0)) {
4802 			gfx_v8_0_cp_compute_fini(adev);
4803 			return r;
4804 		}
4805 		r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4806 				  &mqd_gpu_addr);
4807 		if (r) {
4808 			dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4809 			gfx_v8_0_cp_compute_fini(adev);
4810 			return r;
4811 		}
4812 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4813 		if (r) {
4814 			dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4815 			gfx_v8_0_cp_compute_fini(adev);
4816 			return r;
4817 		}
4818 
4819 		/* init the mqd struct */
4820 		memset(buf, 0, sizeof(struct vi_mqd));
4821 
4822 		mqd = (struct vi_mqd *)buf;
4823 		mqd->header = 0xC0310800;
4824 		mqd->compute_pipelinestat_enable = 0x00000001;
4825 		mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4826 		mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4827 		mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4828 		mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4829 		mqd->compute_misc_reserved = 0x00000003;
4830 
4831 		mutex_lock(&adev->srbm_mutex);
4832 		vi_srbm_select(adev, ring->me,
4833 			       ring->pipe,
4834 			       ring->queue, 0);
4835 
4836 		/* disable wptr polling */
4837 		tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4838 		tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4839 		WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4840 
4841 		mqd->cp_hqd_eop_base_addr_lo =
4842 			RREG32(mmCP_HQD_EOP_BASE_ADDR);
4843 		mqd->cp_hqd_eop_base_addr_hi =
4844 			RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
4845 
4846 		/* enable doorbell? */
4847 		tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4848 		if (use_doorbell) {
4849 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4850 		} else {
4851 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4852 		}
4853 		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4854 		mqd->cp_hqd_pq_doorbell_control = tmp;
4855 
4856 		/* disable the queue if it's active */
4857 		mqd->cp_hqd_dequeue_request = 0;
4858 		mqd->cp_hqd_pq_rptr = 0;
4859 		mqd->cp_hqd_pq_wptr= 0;
4860 		if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4861 			WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4862 			for (j = 0; j < adev->usec_timeout; j++) {
4863 				if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4864 					break;
4865 				udelay(1);
4866 			}
4867 			WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4868 			WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4869 			WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4870 		}
4871 
4872 		/* set the pointer to the MQD */
4873 		mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4874 		mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4875 		WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4876 		WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4877 
4878 		/* set MQD vmid to 0 */
4879 		tmp = RREG32(mmCP_MQD_CONTROL);
4880 		tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4881 		WREG32(mmCP_MQD_CONTROL, tmp);
4882 		mqd->cp_mqd_control = tmp;
4883 
4884 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4885 		hqd_gpu_addr = ring->gpu_addr >> 8;
4886 		mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4887 		mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4888 		WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4889 		WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4890 
4891 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4892 		tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4893 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4894 				    (order_base_2(ring->ring_size / 4) - 1));
4895 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4896 			       ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4897 #ifdef __BIG_ENDIAN
4898 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4899 #endif
4900 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4901 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4902 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4903 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4904 		WREG32(mmCP_HQD_PQ_CONTROL, tmp);
4905 		mqd->cp_hqd_pq_control = tmp;
4906 
4907 		/* set the wb address wether it's enabled or not */
4908 		wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4909 		mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4910 		mqd->cp_hqd_pq_rptr_report_addr_hi =
4911 			upper_32_bits(wb_gpu_addr) & 0xffff;
4912 		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4913 		       mqd->cp_hqd_pq_rptr_report_addr_lo);
4914 		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4915 		       mqd->cp_hqd_pq_rptr_report_addr_hi);
4916 
4917 		/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4918 		wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4919 		mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4920 		mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4921 		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
4922 		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4923 		       mqd->cp_hqd_pq_wptr_poll_addr_hi);
4924 
4925 		/* enable the doorbell if requested */
4926 		if (use_doorbell) {
4927 			if ((adev->asic_type == CHIP_CARRIZO) ||
4928 			    (adev->asic_type == CHIP_FIJI) ||
4929 			    (adev->asic_type == CHIP_STONEY) ||
4930 			    (adev->asic_type == CHIP_POLARIS11) ||
4931 			    (adev->asic_type == CHIP_POLARIS10)) {
4932 				WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4933 				       AMDGPU_DOORBELL_KIQ << 2);
4934 				WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4935 				       AMDGPU_DOORBELL_MEC_RING7 << 2);
4936 			}
4937 			tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4938 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4939 					    DOORBELL_OFFSET, ring->doorbell_index);
4940 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4941 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4942 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4943 			mqd->cp_hqd_pq_doorbell_control = tmp;
4944 
4945 		} else {
4946 			mqd->cp_hqd_pq_doorbell_control = 0;
4947 		}
4948 		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
4949 		       mqd->cp_hqd_pq_doorbell_control);
4950 
4951 		/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4952 		ring->wptr = 0;
4953 		mqd->cp_hqd_pq_wptr = ring->wptr;
4954 		WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4955 		mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4956 
4957 		/* set the vmid for the queue */
4958 		mqd->cp_hqd_vmid = 0;
4959 		WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4960 
4961 		tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4962 		tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4963 		WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
4964 		mqd->cp_hqd_persistent_state = tmp;
4965 		if (adev->asic_type == CHIP_STONEY ||
4966 			adev->asic_type == CHIP_POLARIS11 ||
4967 			adev->asic_type == CHIP_POLARIS10) {
4968 			tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
4969 			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
4970 			WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
4971 		}
4972 
4973 		/* activate the queue */
4974 		mqd->cp_hqd_active = 1;
4975 		WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4976 
4977 		vi_srbm_select(adev, 0, 0, 0, 0);
4978 		mutex_unlock(&adev->srbm_mutex);
4979 
4980 		amdgpu_bo_kunmap(ring->mqd_obj);
4981 		amdgpu_bo_unreserve(ring->mqd_obj);
4982 	}
4983 
4984 	if (use_doorbell) {
4985 		tmp = RREG32(mmCP_PQ_STATUS);
4986 		tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4987 		WREG32(mmCP_PQ_STATUS, tmp);
4988 	}
4989 
4990 	gfx_v8_0_cp_compute_enable(adev, true);
4991 
4992 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4993 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4994 
4995 		ring->ready = true;
4996 		r = amdgpu_ring_test_ring(ring);
4997 		if (r)
4998 			ring->ready = false;
4999 	}
5000 
5001 	return 0;
5002 }
5003 
5004 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
5005 {
5006 	int r;
5007 
5008 	if (!(adev->flags & AMD_IS_APU))
5009 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5010 
5011 	if (!adev->pp_enabled) {
5012 		if (!adev->firmware.smu_load) {
5013 			/* legacy firmware loading */
5014 			r = gfx_v8_0_cp_gfx_load_microcode(adev);
5015 			if (r)
5016 				return r;
5017 
5018 			r = gfx_v8_0_cp_compute_load_microcode(adev);
5019 			if (r)
5020 				return r;
5021 		} else {
5022 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5023 							AMDGPU_UCODE_ID_CP_CE);
5024 			if (r)
5025 				return -EINVAL;
5026 
5027 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5028 							AMDGPU_UCODE_ID_CP_PFP);
5029 			if (r)
5030 				return -EINVAL;
5031 
5032 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5033 							AMDGPU_UCODE_ID_CP_ME);
5034 			if (r)
5035 				return -EINVAL;
5036 
5037 			if (adev->asic_type == CHIP_TOPAZ) {
5038 				r = gfx_v8_0_cp_compute_load_microcode(adev);
5039 				if (r)
5040 					return r;
5041 			} else {
5042 				r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5043 										 AMDGPU_UCODE_ID_CP_MEC1);
5044 				if (r)
5045 					return -EINVAL;
5046 			}
5047 		}
5048 	}
5049 
5050 	r = gfx_v8_0_cp_gfx_resume(adev);
5051 	if (r)
5052 		return r;
5053 
5054 	r = gfx_v8_0_cp_compute_resume(adev);
5055 	if (r)
5056 		return r;
5057 
5058 	gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5059 
5060 	return 0;
5061 }
5062 
5063 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5064 {
5065 	gfx_v8_0_cp_gfx_enable(adev, enable);
5066 	gfx_v8_0_cp_compute_enable(adev, enable);
5067 }
5068 
5069 static int gfx_v8_0_hw_init(void *handle)
5070 {
5071 	int r;
5072 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5073 
5074 	gfx_v8_0_init_golden_registers(adev);
5075 	gfx_v8_0_gpu_init(adev);
5076 
5077 	r = gfx_v8_0_rlc_resume(adev);
5078 	if (r)
5079 		return r;
5080 
5081 	r = gfx_v8_0_cp_resume(adev);
5082 
5083 	return r;
5084 }
5085 
5086 static int gfx_v8_0_hw_fini(void *handle)
5087 {
5088 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5089 
5090 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5091 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5092 	gfx_v8_0_cp_enable(adev, false);
5093 	gfx_v8_0_rlc_stop(adev);
5094 	gfx_v8_0_cp_compute_fini(adev);
5095 
5096 	amdgpu_set_powergating_state(adev,
5097 			AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5098 
5099 	return 0;
5100 }
5101 
5102 static int gfx_v8_0_suspend(void *handle)
5103 {
5104 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5105 
5106 	return gfx_v8_0_hw_fini(adev);
5107 }
5108 
5109 static int gfx_v8_0_resume(void *handle)
5110 {
5111 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5112 
5113 	return gfx_v8_0_hw_init(adev);
5114 }
5115 
5116 static bool gfx_v8_0_is_idle(void *handle)
5117 {
5118 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5119 
5120 	if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5121 		return false;
5122 	else
5123 		return true;
5124 }
5125 
5126 static int gfx_v8_0_wait_for_idle(void *handle)
5127 {
5128 	unsigned i;
5129 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5130 
5131 	for (i = 0; i < adev->usec_timeout; i++) {
5132 		if (gfx_v8_0_is_idle(handle))
5133 			return 0;
5134 
5135 		udelay(1);
5136 	}
5137 	return -ETIMEDOUT;
5138 }
5139 
5140 static int gfx_v8_0_check_soft_reset(void *handle)
5141 {
5142 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5143 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5144 	u32 tmp;
5145 
5146 	/* GRBM_STATUS */
5147 	tmp = RREG32(mmGRBM_STATUS);
5148 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5149 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5150 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5151 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5152 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5153 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5154 		   GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5155 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5156 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5157 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5158 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5159 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5160 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5161 	}
5162 
5163 	/* GRBM_STATUS2 */
5164 	tmp = RREG32(mmGRBM_STATUS2);
5165 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5166 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5167 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5168 
5169 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5170 	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5171 	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5172 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5173 						SOFT_RESET_CPF, 1);
5174 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5175 						SOFT_RESET_CPC, 1);
5176 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5177 						SOFT_RESET_CPG, 1);
5178 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5179 						SOFT_RESET_GRBM, 1);
5180 	}
5181 
5182 	/* SRBM_STATUS */
5183 	tmp = RREG32(mmSRBM_STATUS);
5184 	if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5185 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5186 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5187 	if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5188 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5189 						SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5190 
5191 	if (grbm_soft_reset || srbm_soft_reset) {
5192 		adev->ip_block_status[AMD_IP_BLOCK_TYPE_GFX].hang = true;
5193 		adev->gfx.grbm_soft_reset = grbm_soft_reset;
5194 		adev->gfx.srbm_soft_reset = srbm_soft_reset;
5195 	} else {
5196 		adev->ip_block_status[AMD_IP_BLOCK_TYPE_GFX].hang = false;
5197 		adev->gfx.grbm_soft_reset = 0;
5198 		adev->gfx.srbm_soft_reset = 0;
5199 	}
5200 
5201 	return 0;
5202 }
5203 
5204 static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev,
5205 				  struct amdgpu_ring *ring)
5206 {
5207 	int i;
5208 
5209 	vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5210 	if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
5211 		u32 tmp;
5212 		tmp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
5213 		tmp = REG_SET_FIELD(tmp, CP_HQD_DEQUEUE_REQUEST,
5214 				    DEQUEUE_REQ, 2);
5215 		WREG32(mmCP_HQD_DEQUEUE_REQUEST, tmp);
5216 		for (i = 0; i < adev->usec_timeout; i++) {
5217 			if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
5218 				break;
5219 			udelay(1);
5220 		}
5221 	}
5222 }
5223 
5224 static int gfx_v8_0_pre_soft_reset(void *handle)
5225 {
5226 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5227 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5228 
5229 	if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_GFX].hang)
5230 		return 0;
5231 
5232 	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5233 	srbm_soft_reset = adev->gfx.srbm_soft_reset;
5234 
5235 	/* stop the rlc */
5236 	gfx_v8_0_rlc_stop(adev);
5237 
5238 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5239 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5240 		/* Disable GFX parsing/prefetching */
5241 		gfx_v8_0_cp_gfx_enable(adev, false);
5242 
5243 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5244 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5245 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5246 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5247 		int i;
5248 
5249 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5250 			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5251 
5252 			gfx_v8_0_inactive_hqd(adev, ring);
5253 		}
5254 		/* Disable MEC parsing/prefetching */
5255 		gfx_v8_0_cp_compute_enable(adev, false);
5256 	}
5257 
5258        return 0;
5259 }
5260 
5261 static int gfx_v8_0_soft_reset(void *handle)
5262 {
5263 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5264 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5265 	u32 tmp;
5266 
5267 	if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_GFX].hang)
5268 		return 0;
5269 
5270 	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5271 	srbm_soft_reset = adev->gfx.srbm_soft_reset;
5272 
5273 	if (grbm_soft_reset || srbm_soft_reset) {
5274 		tmp = RREG32(mmGMCON_DEBUG);
5275 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5276 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5277 		WREG32(mmGMCON_DEBUG, tmp);
5278 		udelay(50);
5279 	}
5280 
5281 	if (grbm_soft_reset) {
5282 		tmp = RREG32(mmGRBM_SOFT_RESET);
5283 		tmp |= grbm_soft_reset;
5284 		dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5285 		WREG32(mmGRBM_SOFT_RESET, tmp);
5286 		tmp = RREG32(mmGRBM_SOFT_RESET);
5287 
5288 		udelay(50);
5289 
5290 		tmp &= ~grbm_soft_reset;
5291 		WREG32(mmGRBM_SOFT_RESET, tmp);
5292 		tmp = RREG32(mmGRBM_SOFT_RESET);
5293 	}
5294 
5295 	if (srbm_soft_reset) {
5296 		tmp = RREG32(mmSRBM_SOFT_RESET);
5297 		tmp |= srbm_soft_reset;
5298 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5299 		WREG32(mmSRBM_SOFT_RESET, tmp);
5300 		tmp = RREG32(mmSRBM_SOFT_RESET);
5301 
5302 		udelay(50);
5303 
5304 		tmp &= ~srbm_soft_reset;
5305 		WREG32(mmSRBM_SOFT_RESET, tmp);
5306 		tmp = RREG32(mmSRBM_SOFT_RESET);
5307 	}
5308 
5309 	if (grbm_soft_reset || srbm_soft_reset) {
5310 		tmp = RREG32(mmGMCON_DEBUG);
5311 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5312 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5313 		WREG32(mmGMCON_DEBUG, tmp);
5314 	}
5315 
5316 	/* Wait a little for things to settle down */
5317 	udelay(50);
5318 
5319 	return 0;
5320 }
5321 
5322 static void gfx_v8_0_init_hqd(struct amdgpu_device *adev,
5323 			      struct amdgpu_ring *ring)
5324 {
5325 	vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5326 	WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
5327 	WREG32(mmCP_HQD_PQ_RPTR, 0);
5328 	WREG32(mmCP_HQD_PQ_WPTR, 0);
5329 	vi_srbm_select(adev, 0, 0, 0, 0);
5330 }
5331 
5332 static int gfx_v8_0_post_soft_reset(void *handle)
5333 {
5334 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5335 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5336 
5337 	if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_GFX].hang)
5338 		return 0;
5339 
5340 	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5341 	srbm_soft_reset = adev->gfx.srbm_soft_reset;
5342 
5343 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5344 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5345 		gfx_v8_0_cp_gfx_resume(adev);
5346 
5347 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5348 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5349 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5350 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5351 		int i;
5352 
5353 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5354 			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5355 
5356 			gfx_v8_0_init_hqd(adev, ring);
5357 		}
5358 		gfx_v8_0_cp_compute_resume(adev);
5359 	}
5360 	gfx_v8_0_rlc_start(adev);
5361 
5362 	return 0;
5363 }
5364 
5365 /**
5366  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5367  *
5368  * @adev: amdgpu_device pointer
5369  *
5370  * Fetches a GPU clock counter snapshot.
5371  * Returns the 64 bit clock counter snapshot.
5372  */
5373 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5374 {
5375 	uint64_t clock;
5376 
5377 	mutex_lock(&adev->gfx.gpu_clock_mutex);
5378 	WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5379 	clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5380 		((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5381 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
5382 	return clock;
5383 }
5384 
5385 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5386 					  uint32_t vmid,
5387 					  uint32_t gds_base, uint32_t gds_size,
5388 					  uint32_t gws_base, uint32_t gws_size,
5389 					  uint32_t oa_base, uint32_t oa_size)
5390 {
5391 	gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5392 	gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5393 
5394 	gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5395 	gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5396 
5397 	oa_base = oa_base >> AMDGPU_OA_SHIFT;
5398 	oa_size = oa_size >> AMDGPU_OA_SHIFT;
5399 
5400 	/* GDS Base */
5401 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5402 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5403 				WRITE_DATA_DST_SEL(0)));
5404 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5405 	amdgpu_ring_write(ring, 0);
5406 	amdgpu_ring_write(ring, gds_base);
5407 
5408 	/* GDS Size */
5409 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5410 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5411 				WRITE_DATA_DST_SEL(0)));
5412 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5413 	amdgpu_ring_write(ring, 0);
5414 	amdgpu_ring_write(ring, gds_size);
5415 
5416 	/* GWS */
5417 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5418 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5419 				WRITE_DATA_DST_SEL(0)));
5420 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5421 	amdgpu_ring_write(ring, 0);
5422 	amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5423 
5424 	/* OA */
5425 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5426 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5427 				WRITE_DATA_DST_SEL(0)));
5428 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5429 	amdgpu_ring_write(ring, 0);
5430 	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5431 }
5432 
5433 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5434 	.get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5435 	.select_se_sh = &gfx_v8_0_select_se_sh,
5436 };
5437 
5438 static int gfx_v8_0_early_init(void *handle)
5439 {
5440 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5441 
5442 	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5443 	adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5444 	adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5445 	gfx_v8_0_set_ring_funcs(adev);
5446 	gfx_v8_0_set_irq_funcs(adev);
5447 	gfx_v8_0_set_gds_init(adev);
5448 	gfx_v8_0_set_rlc_funcs(adev);
5449 
5450 	return 0;
5451 }
5452 
5453 static int gfx_v8_0_late_init(void *handle)
5454 {
5455 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5456 	int r;
5457 
5458 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5459 	if (r)
5460 		return r;
5461 
5462 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5463 	if (r)
5464 		return r;
5465 
5466 	/* requires IBs so do in late init after IB pool is initialized */
5467 	r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5468 	if (r)
5469 		return r;
5470 
5471 	amdgpu_set_powergating_state(adev,
5472 			AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5473 
5474 	return 0;
5475 }
5476 
5477 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5478 						       bool enable)
5479 {
5480 	if (adev->asic_type == CHIP_POLARIS11)
5481 		/* Send msg to SMU via Powerplay */
5482 		amdgpu_set_powergating_state(adev,
5483 					     AMD_IP_BLOCK_TYPE_SMC,
5484 					     enable ?
5485 					     AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5486 
5487 	WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5488 }
5489 
5490 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5491 							bool enable)
5492 {
5493 	WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5494 }
5495 
5496 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5497 		bool enable)
5498 {
5499 	WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5500 }
5501 
5502 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5503 					  bool enable)
5504 {
5505 	WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5506 }
5507 
5508 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5509 						bool enable)
5510 {
5511 	WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5512 
5513 	/* Read any GFX register to wake up GFX. */
5514 	if (!enable)
5515 		RREG32(mmDB_RENDER_CONTROL);
5516 }
5517 
5518 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5519 					  bool enable)
5520 {
5521 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5522 		cz_enable_gfx_cg_power_gating(adev, true);
5523 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5524 			cz_enable_gfx_pipeline_power_gating(adev, true);
5525 	} else {
5526 		cz_enable_gfx_cg_power_gating(adev, false);
5527 		cz_enable_gfx_pipeline_power_gating(adev, false);
5528 	}
5529 }
5530 
5531 static int gfx_v8_0_set_powergating_state(void *handle,
5532 					  enum amd_powergating_state state)
5533 {
5534 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5535 	bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
5536 
5537 	if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5538 		return 0;
5539 
5540 	switch (adev->asic_type) {
5541 	case CHIP_CARRIZO:
5542 	case CHIP_STONEY:
5543 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)
5544 			cz_update_gfx_cg_power_gating(adev, enable);
5545 
5546 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5547 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5548 		else
5549 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5550 
5551 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5552 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5553 		else
5554 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5555 		break;
5556 	case CHIP_POLARIS11:
5557 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5558 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5559 		else
5560 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5561 
5562 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5563 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5564 		else
5565 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5566 
5567 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5568 			polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5569 		else
5570 			polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5571 		break;
5572 	default:
5573 		break;
5574 	}
5575 
5576 	return 0;
5577 }
5578 
5579 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5580 				     uint32_t reg_addr, uint32_t cmd)
5581 {
5582 	uint32_t data;
5583 
5584 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5585 
5586 	WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5587 	WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5588 
5589 	data = RREG32(mmRLC_SERDES_WR_CTRL);
5590 	if (adev->asic_type == CHIP_STONEY)
5591 		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5592 			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5593 			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5594 			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5595 			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5596 			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5597 			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5598 			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5599 			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5600 	else
5601 		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5602 			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5603 			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5604 			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5605 			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5606 			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5607 			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5608 			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5609 			  RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5610 			  RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5611 			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5612 	data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5613 		 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5614 		 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5615 		 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5616 
5617 	WREG32(mmRLC_SERDES_WR_CTRL, data);
5618 }
5619 
5620 #define MSG_ENTER_RLC_SAFE_MODE     1
5621 #define MSG_EXIT_RLC_SAFE_MODE      0
5622 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5623 #define RLC_GPR_REG2__REQ__SHIFT 0
5624 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5625 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5626 
5627 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
5628 {
5629 	u32 data = 0;
5630 	unsigned i;
5631 
5632 	data = RREG32(mmRLC_CNTL);
5633 	if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5634 		return;
5635 
5636 	if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5637 	    (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5638 			       AMD_PG_SUPPORT_GFX_DMG))) {
5639 		data |= RLC_GPR_REG2__REQ_MASK;
5640 		data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5641 		data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5642 		WREG32(mmRLC_GPR_REG2, data);
5643 
5644 		for (i = 0; i < adev->usec_timeout; i++) {
5645 			if ((RREG32(mmRLC_GPM_STAT) &
5646 			     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5647 			      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5648 			    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5649 			     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5650 				break;
5651 			udelay(1);
5652 		}
5653 
5654 		for (i = 0; i < adev->usec_timeout; i++) {
5655 			if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
5656 				break;
5657 			udelay(1);
5658 		}
5659 		adev->gfx.rlc.in_safe_mode = true;
5660 	}
5661 }
5662 
5663 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
5664 {
5665 	u32 data;
5666 	unsigned i;
5667 
5668 	data = RREG32(mmRLC_CNTL);
5669 	if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5670 		return;
5671 
5672 	if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5673 	    (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5674 			       AMD_PG_SUPPORT_GFX_DMG))) {
5675 		data |= RLC_GPR_REG2__REQ_MASK;
5676 		data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5677 		data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5678 		WREG32(mmRLC_GPR_REG2, data);
5679 		adev->gfx.rlc.in_safe_mode = false;
5680 	}
5681 
5682 	for (i = 0; i < adev->usec_timeout; i++) {
5683 		if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
5684 			break;
5685 		udelay(1);
5686 	}
5687 }
5688 
5689 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5690 {
5691 	u32 data;
5692 	unsigned i;
5693 
5694 	data = RREG32(mmRLC_CNTL);
5695 	if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5696 		return;
5697 
5698 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5699 		data |= RLC_SAFE_MODE__CMD_MASK;
5700 		data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5701 		data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5702 		WREG32(mmRLC_SAFE_MODE, data);
5703 
5704 		for (i = 0; i < adev->usec_timeout; i++) {
5705 			if ((RREG32(mmRLC_GPM_STAT) &
5706 			     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5707 			      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5708 			    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5709 			     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5710 				break;
5711 			udelay(1);
5712 		}
5713 
5714 		for (i = 0; i < adev->usec_timeout; i++) {
5715 			if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5716 				break;
5717 			udelay(1);
5718 		}
5719 		adev->gfx.rlc.in_safe_mode = true;
5720 	}
5721 }
5722 
5723 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5724 {
5725 	u32 data = 0;
5726 	unsigned i;
5727 
5728 	data = RREG32(mmRLC_CNTL);
5729 	if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5730 		return;
5731 
5732 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5733 		if (adev->gfx.rlc.in_safe_mode) {
5734 			data |= RLC_SAFE_MODE__CMD_MASK;
5735 			data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5736 			WREG32(mmRLC_SAFE_MODE, data);
5737 			adev->gfx.rlc.in_safe_mode = false;
5738 		}
5739 	}
5740 
5741 	for (i = 0; i < adev->usec_timeout; i++) {
5742 		if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5743 			break;
5744 		udelay(1);
5745 	}
5746 }
5747 
5748 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
5749 {
5750 	adev->gfx.rlc.in_safe_mode = true;
5751 }
5752 
5753 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
5754 {
5755 	adev->gfx.rlc.in_safe_mode = false;
5756 }
5757 
5758 static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
5759 	.enter_safe_mode = cz_enter_rlc_safe_mode,
5760 	.exit_safe_mode = cz_exit_rlc_safe_mode
5761 };
5762 
5763 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5764 	.enter_safe_mode = iceland_enter_rlc_safe_mode,
5765 	.exit_safe_mode = iceland_exit_rlc_safe_mode
5766 };
5767 
5768 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
5769 	.enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
5770 	.exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
5771 };
5772 
5773 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5774 						      bool enable)
5775 {
5776 	uint32_t temp, data;
5777 
5778 	adev->gfx.rlc.funcs->enter_safe_mode(adev);
5779 
5780 	/* It is disabled by HW by default */
5781 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5782 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5783 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5784 				/* 1 - RLC memory Light sleep */
5785 				WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5786 
5787 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5788 				WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5789 		}
5790 
5791 		/* 3 - RLC_CGTT_MGCG_OVERRIDE */
5792 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5793 		if (adev->flags & AMD_IS_APU)
5794 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5795 				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5796 				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5797 		else
5798 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5799 				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5800 				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5801 				  RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5802 
5803 		if (temp != data)
5804 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5805 
5806 		/* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5807 		gfx_v8_0_wait_for_rlc_serdes(adev);
5808 
5809 		/* 5 - clear mgcg override */
5810 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5811 
5812 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5813 			/* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5814 			temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5815 			data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5816 			data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5817 			data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5818 			data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5819 			if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5820 			    (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5821 				data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5822 			data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5823 			data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5824 			if (temp != data)
5825 				WREG32(mmCGTS_SM_CTRL_REG, data);
5826 		}
5827 		udelay(50);
5828 
5829 		/* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5830 		gfx_v8_0_wait_for_rlc_serdes(adev);
5831 	} else {
5832 		/* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5833 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5834 		data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5835 				RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5836 				RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5837 				RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5838 		if (temp != data)
5839 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5840 
5841 		/* 2 - disable MGLS in RLC */
5842 		data = RREG32(mmRLC_MEM_SLP_CNTL);
5843 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5844 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5845 			WREG32(mmRLC_MEM_SLP_CNTL, data);
5846 		}
5847 
5848 		/* 3 - disable MGLS in CP */
5849 		data = RREG32(mmCP_MEM_SLP_CNTL);
5850 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5851 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5852 			WREG32(mmCP_MEM_SLP_CNTL, data);
5853 		}
5854 
5855 		/* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5856 		temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5857 		data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5858 				CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5859 		if (temp != data)
5860 			WREG32(mmCGTS_SM_CTRL_REG, data);
5861 
5862 		/* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5863 		gfx_v8_0_wait_for_rlc_serdes(adev);
5864 
5865 		/* 6 - set mgcg override */
5866 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5867 
5868 		udelay(50);
5869 
5870 		/* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5871 		gfx_v8_0_wait_for_rlc_serdes(adev);
5872 	}
5873 
5874 	adev->gfx.rlc.funcs->exit_safe_mode(adev);
5875 }
5876 
5877 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5878 						      bool enable)
5879 {
5880 	uint32_t temp, temp1, data, data1;
5881 
5882 	temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5883 
5884 	adev->gfx.rlc.funcs->enter_safe_mode(adev);
5885 
5886 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5887 		/* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
5888 		 * Cmp_busy/GFX_Idle interrupts
5889 		 */
5890 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5891 
5892 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5893 		data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5894 		if (temp1 != data1)
5895 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5896 
5897 		/* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5898 		gfx_v8_0_wait_for_rlc_serdes(adev);
5899 
5900 		/* 3 - clear cgcg override */
5901 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5902 
5903 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5904 		gfx_v8_0_wait_for_rlc_serdes(adev);
5905 
5906 		/* 4 - write cmd to set CGLS */
5907 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5908 
5909 		/* 5 - enable cgcg */
5910 		data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5911 
5912 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5913 			/* enable cgls*/
5914 			data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5915 
5916 			temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5917 			data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5918 
5919 			if (temp1 != data1)
5920 				WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5921 		} else {
5922 			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5923 		}
5924 
5925 		if (temp != data)
5926 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5927 	} else {
5928 		/* disable cntx_empty_int_enable & GFX Idle interrupt */
5929 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5930 
5931 		/* TEST CGCG */
5932 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5933 		data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5934 				RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5935 		if (temp1 != data1)
5936 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5937 
5938 		/* read gfx register to wake up cgcg */
5939 		RREG32(mmCB_CGTT_SCLK_CTRL);
5940 		RREG32(mmCB_CGTT_SCLK_CTRL);
5941 		RREG32(mmCB_CGTT_SCLK_CTRL);
5942 		RREG32(mmCB_CGTT_SCLK_CTRL);
5943 
5944 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5945 		gfx_v8_0_wait_for_rlc_serdes(adev);
5946 
5947 		/* write cmd to Set CGCG Overrride */
5948 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5949 
5950 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5951 		gfx_v8_0_wait_for_rlc_serdes(adev);
5952 
5953 		/* write cmd to Clear CGLS */
5954 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5955 
5956 		/* disable cgcg, cgls should be disabled too. */
5957 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5958 			  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5959 		if (temp != data)
5960 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5961 	}
5962 
5963 	gfx_v8_0_wait_for_rlc_serdes(adev);
5964 
5965 	adev->gfx.rlc.funcs->exit_safe_mode(adev);
5966 }
5967 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5968 					    bool enable)
5969 {
5970 	if (enable) {
5971 		/* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5972 		 * ===  MGCG + MGLS + TS(CG/LS) ===
5973 		 */
5974 		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5975 		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5976 	} else {
5977 		/* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5978 		 * ===  CGCG + CGLS ===
5979 		 */
5980 		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5981 		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5982 	}
5983 	return 0;
5984 }
5985 
5986 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5987 					  enum amd_clockgating_state state)
5988 {
5989 	uint32_t msg_id, pp_state;
5990 	void *pp_handle = adev->powerplay.pp_handle;
5991 
5992 	if (state == AMD_CG_STATE_UNGATE)
5993 		pp_state = 0;
5994 	else
5995 		pp_state = PP_STATE_CG | PP_STATE_LS;
5996 
5997 	msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5998 			PP_BLOCK_GFX_CG,
5999 			PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6000 			pp_state);
6001 	amd_set_clockgating_by_smu(pp_handle, msg_id);
6002 
6003 	msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6004 			PP_BLOCK_GFX_MG,
6005 			PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6006 			pp_state);
6007 	amd_set_clockgating_by_smu(pp_handle, msg_id);
6008 
6009 	return 0;
6010 }
6011 
6012 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6013 					  enum amd_clockgating_state state)
6014 {
6015 	uint32_t msg_id, pp_state;
6016 	void *pp_handle = adev->powerplay.pp_handle;
6017 
6018 	if (state == AMD_CG_STATE_UNGATE)
6019 		pp_state = 0;
6020 	else
6021 		pp_state = PP_STATE_CG | PP_STATE_LS;
6022 
6023 	msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6024 			PP_BLOCK_GFX_CG,
6025 			PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6026 			pp_state);
6027 	amd_set_clockgating_by_smu(pp_handle, msg_id);
6028 
6029 	msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6030 			PP_BLOCK_GFX_3D,
6031 			PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6032 			pp_state);
6033 	amd_set_clockgating_by_smu(pp_handle, msg_id);
6034 
6035 	msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6036 			PP_BLOCK_GFX_MG,
6037 			PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6038 			pp_state);
6039 	amd_set_clockgating_by_smu(pp_handle, msg_id);
6040 
6041 	msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6042 			PP_BLOCK_GFX_RLC,
6043 			PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6044 			pp_state);
6045 	amd_set_clockgating_by_smu(pp_handle, msg_id);
6046 
6047 	msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6048 			PP_BLOCK_GFX_CP,
6049 			PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6050 			pp_state);
6051 	amd_set_clockgating_by_smu(pp_handle, msg_id);
6052 
6053 	return 0;
6054 }
6055 
6056 static int gfx_v8_0_set_clockgating_state(void *handle,
6057 					  enum amd_clockgating_state state)
6058 {
6059 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6060 
6061 	switch (adev->asic_type) {
6062 	case CHIP_FIJI:
6063 	case CHIP_CARRIZO:
6064 	case CHIP_STONEY:
6065 		gfx_v8_0_update_gfx_clock_gating(adev,
6066 						 state == AMD_CG_STATE_GATE ? true : false);
6067 		break;
6068 	case CHIP_TONGA:
6069 		gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6070 		break;
6071 	case CHIP_POLARIS10:
6072 	case CHIP_POLARIS11:
6073 		gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6074 		break;
6075 	default:
6076 		break;
6077 	}
6078 	return 0;
6079 }
6080 
6081 static u32 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6082 {
6083 	return ring->adev->wb.wb[ring->rptr_offs];
6084 }
6085 
6086 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6087 {
6088 	struct amdgpu_device *adev = ring->adev;
6089 
6090 	if (ring->use_doorbell)
6091 		/* XXX check if swapping is necessary on BE */
6092 		return ring->adev->wb.wb[ring->wptr_offs];
6093 	else
6094 		return RREG32(mmCP_RB0_WPTR);
6095 }
6096 
6097 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6098 {
6099 	struct amdgpu_device *adev = ring->adev;
6100 
6101 	if (ring->use_doorbell) {
6102 		/* XXX check if swapping is necessary on BE */
6103 		adev->wb.wb[ring->wptr_offs] = ring->wptr;
6104 		WDOORBELL32(ring->doorbell_index, ring->wptr);
6105 	} else {
6106 		WREG32(mmCP_RB0_WPTR, ring->wptr);
6107 		(void)RREG32(mmCP_RB0_WPTR);
6108 	}
6109 }
6110 
6111 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6112 {
6113 	u32 ref_and_mask, reg_mem_engine;
6114 
6115 	if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
6116 		switch (ring->me) {
6117 		case 1:
6118 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6119 			break;
6120 		case 2:
6121 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6122 			break;
6123 		default:
6124 			return;
6125 		}
6126 		reg_mem_engine = 0;
6127 	} else {
6128 		ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6129 		reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6130 	}
6131 
6132 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6133 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6134 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
6135 				 reg_mem_engine));
6136 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6137 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6138 	amdgpu_ring_write(ring, ref_and_mask);
6139 	amdgpu_ring_write(ring, ref_and_mask);
6140 	amdgpu_ring_write(ring, 0x20); /* poll interval */
6141 }
6142 
6143 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6144 {
6145 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6146 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6147 				 WRITE_DATA_DST_SEL(0) |
6148 				 WR_CONFIRM));
6149 	amdgpu_ring_write(ring, mmHDP_DEBUG0);
6150 	amdgpu_ring_write(ring, 0);
6151 	amdgpu_ring_write(ring, 1);
6152 
6153 }
6154 
6155 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6156 				      struct amdgpu_ib *ib,
6157 				      unsigned vm_id, bool ctx_switch)
6158 {
6159 	u32 header, control = 0;
6160 
6161 	if (ib->flags & AMDGPU_IB_FLAG_CE)
6162 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6163 	else
6164 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6165 
6166 	control |= ib->length_dw | (vm_id << 24);
6167 
6168 	amdgpu_ring_write(ring, header);
6169 	amdgpu_ring_write(ring,
6170 #ifdef __BIG_ENDIAN
6171 			  (2 << 0) |
6172 #endif
6173 			  (ib->gpu_addr & 0xFFFFFFFC));
6174 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6175 	amdgpu_ring_write(ring, control);
6176 }
6177 
6178 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6179 					  struct amdgpu_ib *ib,
6180 					  unsigned vm_id, bool ctx_switch)
6181 {
6182 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6183 
6184 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6185 	amdgpu_ring_write(ring,
6186 #ifdef __BIG_ENDIAN
6187 				(2 << 0) |
6188 #endif
6189 				(ib->gpu_addr & 0xFFFFFFFC));
6190 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6191 	amdgpu_ring_write(ring, control);
6192 }
6193 
6194 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6195 					 u64 seq, unsigned flags)
6196 {
6197 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6198 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6199 
6200 	/* EVENT_WRITE_EOP - flush caches, send int */
6201 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6202 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6203 				 EOP_TC_ACTION_EN |
6204 				 EOP_TC_WB_ACTION_EN |
6205 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6206 				 EVENT_INDEX(5)));
6207 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6208 	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6209 			  DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6210 	amdgpu_ring_write(ring, lower_32_bits(seq));
6211 	amdgpu_ring_write(ring, upper_32_bits(seq));
6212 
6213 }
6214 
6215 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6216 {
6217 	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
6218 	uint32_t seq = ring->fence_drv.sync_seq;
6219 	uint64_t addr = ring->fence_drv.gpu_addr;
6220 
6221 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6222 	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6223 				 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6224 				 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6225 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6226 	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6227 	amdgpu_ring_write(ring, seq);
6228 	amdgpu_ring_write(ring, 0xffffffff);
6229 	amdgpu_ring_write(ring, 4); /* poll interval */
6230 }
6231 
6232 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6233 					unsigned vm_id, uint64_t pd_addr)
6234 {
6235 	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
6236 
6237 	/* GFX8 emits 128 dw nop to prevent DE do vm_flush before CE finish CEIB */
6238 	if (usepfp)
6239 		amdgpu_ring_insert_nop(ring, 128);
6240 
6241 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6242 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6243 				 WRITE_DATA_DST_SEL(0)) |
6244 				 WR_CONFIRM);
6245 	if (vm_id < 8) {
6246 		amdgpu_ring_write(ring,
6247 				  (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6248 	} else {
6249 		amdgpu_ring_write(ring,
6250 				  (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6251 	}
6252 	amdgpu_ring_write(ring, 0);
6253 	amdgpu_ring_write(ring, pd_addr >> 12);
6254 
6255 	/* bits 0-15 are the VM contexts0-15 */
6256 	/* invalidate the cache */
6257 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6258 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6259 				 WRITE_DATA_DST_SEL(0)));
6260 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6261 	amdgpu_ring_write(ring, 0);
6262 	amdgpu_ring_write(ring, 1 << vm_id);
6263 
6264 	/* wait for the invalidate to complete */
6265 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6266 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6267 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6268 				 WAIT_REG_MEM_ENGINE(0))); /* me */
6269 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6270 	amdgpu_ring_write(ring, 0);
6271 	amdgpu_ring_write(ring, 0); /* ref */
6272 	amdgpu_ring_write(ring, 0); /* mask */
6273 	amdgpu_ring_write(ring, 0x20); /* poll interval */
6274 
6275 	/* compute doesn't have PFP */
6276 	if (usepfp) {
6277 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6278 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6279 		amdgpu_ring_write(ring, 0x0);
6280 		/* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */
6281 		amdgpu_ring_insert_nop(ring, 128);
6282 	}
6283 }
6284 
6285 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6286 {
6287 	return ring->adev->wb.wb[ring->wptr_offs];
6288 }
6289 
6290 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6291 {
6292 	struct amdgpu_device *adev = ring->adev;
6293 
6294 	/* XXX check if swapping is necessary on BE */
6295 	adev->wb.wb[ring->wptr_offs] = ring->wptr;
6296 	WDOORBELL32(ring->doorbell_index, ring->wptr);
6297 }
6298 
6299 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6300 					     u64 addr, u64 seq,
6301 					     unsigned flags)
6302 {
6303 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6304 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6305 
6306 	/* RELEASE_MEM - flush caches, send int */
6307 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6308 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6309 				 EOP_TC_ACTION_EN |
6310 				 EOP_TC_WB_ACTION_EN |
6311 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6312 				 EVENT_INDEX(5)));
6313 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6314 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6315 	amdgpu_ring_write(ring, upper_32_bits(addr));
6316 	amdgpu_ring_write(ring, lower_32_bits(seq));
6317 	amdgpu_ring_write(ring, upper_32_bits(seq));
6318 }
6319 
6320 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6321 {
6322 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6323 	amdgpu_ring_write(ring, 0);
6324 }
6325 
6326 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6327 {
6328 	uint32_t dw2 = 0;
6329 
6330 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6331 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6332 		/* set load_global_config & load_global_uconfig */
6333 		dw2 |= 0x8001;
6334 		/* set load_cs_sh_regs */
6335 		dw2 |= 0x01000000;
6336 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
6337 		dw2 |= 0x10002;
6338 
6339 		/* set load_ce_ram if preamble presented */
6340 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6341 			dw2 |= 0x10000000;
6342 	} else {
6343 		/* still load_ce_ram if this is the first time preamble presented
6344 		 * although there is no context switch happens.
6345 		 */
6346 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6347 			dw2 |= 0x10000000;
6348 	}
6349 
6350 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6351 	amdgpu_ring_write(ring, dw2);
6352 	amdgpu_ring_write(ring, 0);
6353 }
6354 
6355 static unsigned gfx_v8_0_ring_get_emit_ib_size_gfx(struct amdgpu_ring *ring)
6356 {
6357 	return
6358 		4; /* gfx_v8_0_ring_emit_ib_gfx */
6359 }
6360 
6361 static unsigned gfx_v8_0_ring_get_dma_frame_size_gfx(struct amdgpu_ring *ring)
6362 {
6363 	return
6364 		20 + /* gfx_v8_0_ring_emit_gds_switch */
6365 		7 + /* gfx_v8_0_ring_emit_hdp_flush */
6366 		5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6367 		6 + 6 + 6 +/* gfx_v8_0_ring_emit_fence_gfx x3 for user fence, vm fence */
6368 		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6369 		256 + 19 + /* gfx_v8_0_ring_emit_vm_flush */
6370 		2 + /* gfx_v8_ring_emit_sb */
6371 		3; /* gfx_v8_ring_emit_cntxcntl */
6372 }
6373 
6374 static unsigned gfx_v8_0_ring_get_emit_ib_size_compute(struct amdgpu_ring *ring)
6375 {
6376 	return
6377 		4; /* gfx_v8_0_ring_emit_ib_compute */
6378 }
6379 
6380 static unsigned gfx_v8_0_ring_get_dma_frame_size_compute(struct amdgpu_ring *ring)
6381 {
6382 	return
6383 		20 + /* gfx_v8_0_ring_emit_gds_switch */
6384 		7 + /* gfx_v8_0_ring_emit_hdp_flush */
6385 		5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6386 		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6387 		17 + /* gfx_v8_0_ring_emit_vm_flush */
6388 		7 + 7 + 7; /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6389 }
6390 
6391 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6392 						 enum amdgpu_interrupt_state state)
6393 {
6394 	WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6395 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6396 }
6397 
6398 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6399 						     int me, int pipe,
6400 						     enum amdgpu_interrupt_state state)
6401 {
6402 	/*
6403 	 * amdgpu controls only pipe 0 of MEC1. That's why this function only
6404 	 * handles the setting of interrupts for this specific pipe. All other
6405 	 * pipes' interrupts are set by amdkfd.
6406 	 */
6407 
6408 	if (me == 1) {
6409 		switch (pipe) {
6410 		case 0:
6411 			break;
6412 		default:
6413 			DRM_DEBUG("invalid pipe %d\n", pipe);
6414 			return;
6415 		}
6416 	} else {
6417 		DRM_DEBUG("invalid me %d\n", me);
6418 		return;
6419 	}
6420 
6421 	WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE,
6422 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6423 }
6424 
6425 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6426 					     struct amdgpu_irq_src *source,
6427 					     unsigned type,
6428 					     enum amdgpu_interrupt_state state)
6429 {
6430 	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6431 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6432 
6433 	return 0;
6434 }
6435 
6436 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6437 					      struct amdgpu_irq_src *source,
6438 					      unsigned type,
6439 					      enum amdgpu_interrupt_state state)
6440 {
6441 	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6442 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6443 
6444 	return 0;
6445 }
6446 
6447 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6448 					    struct amdgpu_irq_src *src,
6449 					    unsigned type,
6450 					    enum amdgpu_interrupt_state state)
6451 {
6452 	switch (type) {
6453 	case AMDGPU_CP_IRQ_GFX_EOP:
6454 		gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6455 		break;
6456 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6457 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6458 		break;
6459 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6460 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6461 		break;
6462 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6463 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6464 		break;
6465 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6466 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6467 		break;
6468 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6469 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6470 		break;
6471 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6472 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6473 		break;
6474 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6475 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6476 		break;
6477 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6478 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6479 		break;
6480 	default:
6481 		break;
6482 	}
6483 	return 0;
6484 }
6485 
6486 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6487 			    struct amdgpu_irq_src *source,
6488 			    struct amdgpu_iv_entry *entry)
6489 {
6490 	int i;
6491 	u8 me_id, pipe_id, queue_id;
6492 	struct amdgpu_ring *ring;
6493 
6494 	DRM_DEBUG("IH: CP EOP\n");
6495 	me_id = (entry->ring_id & 0x0c) >> 2;
6496 	pipe_id = (entry->ring_id & 0x03) >> 0;
6497 	queue_id = (entry->ring_id & 0x70) >> 4;
6498 
6499 	switch (me_id) {
6500 	case 0:
6501 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6502 		break;
6503 	case 1:
6504 	case 2:
6505 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6506 			ring = &adev->gfx.compute_ring[i];
6507 			/* Per-queue interrupt is supported for MEC starting from VI.
6508 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
6509 			  */
6510 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6511 				amdgpu_fence_process(ring);
6512 		}
6513 		break;
6514 	}
6515 	return 0;
6516 }
6517 
6518 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6519 				 struct amdgpu_irq_src *source,
6520 				 struct amdgpu_iv_entry *entry)
6521 {
6522 	DRM_ERROR("Illegal register access in command stream\n");
6523 	schedule_work(&adev->reset_work);
6524 	return 0;
6525 }
6526 
6527 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6528 				  struct amdgpu_irq_src *source,
6529 				  struct amdgpu_iv_entry *entry)
6530 {
6531 	DRM_ERROR("Illegal instruction in command stream\n");
6532 	schedule_work(&adev->reset_work);
6533 	return 0;
6534 }
6535 
6536 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6537 	.name = "gfx_v8_0",
6538 	.early_init = gfx_v8_0_early_init,
6539 	.late_init = gfx_v8_0_late_init,
6540 	.sw_init = gfx_v8_0_sw_init,
6541 	.sw_fini = gfx_v8_0_sw_fini,
6542 	.hw_init = gfx_v8_0_hw_init,
6543 	.hw_fini = gfx_v8_0_hw_fini,
6544 	.suspend = gfx_v8_0_suspend,
6545 	.resume = gfx_v8_0_resume,
6546 	.is_idle = gfx_v8_0_is_idle,
6547 	.wait_for_idle = gfx_v8_0_wait_for_idle,
6548 	.check_soft_reset = gfx_v8_0_check_soft_reset,
6549 	.pre_soft_reset = gfx_v8_0_pre_soft_reset,
6550 	.soft_reset = gfx_v8_0_soft_reset,
6551 	.post_soft_reset = gfx_v8_0_post_soft_reset,
6552 	.set_clockgating_state = gfx_v8_0_set_clockgating_state,
6553 	.set_powergating_state = gfx_v8_0_set_powergating_state,
6554 };
6555 
6556 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6557 	.get_rptr = gfx_v8_0_ring_get_rptr,
6558 	.get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6559 	.set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6560 	.parse_cs = NULL,
6561 	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6562 	.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6563 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6564 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6565 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6566 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6567 	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6568 	.test_ring = gfx_v8_0_ring_test_ring,
6569 	.test_ib = gfx_v8_0_ring_test_ib,
6570 	.insert_nop = amdgpu_ring_insert_nop,
6571 	.pad_ib = amdgpu_ring_generic_pad_ib,
6572 	.emit_switch_buffer = gfx_v8_ring_emit_sb,
6573 	.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6574 	.get_emit_ib_size = gfx_v8_0_ring_get_emit_ib_size_gfx,
6575 	.get_dma_frame_size = gfx_v8_0_ring_get_dma_frame_size_gfx,
6576 };
6577 
6578 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6579 	.get_rptr = gfx_v8_0_ring_get_rptr,
6580 	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
6581 	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
6582 	.parse_cs = NULL,
6583 	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
6584 	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
6585 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6586 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6587 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6588 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6589 	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6590 	.test_ring = gfx_v8_0_ring_test_ring,
6591 	.test_ib = gfx_v8_0_ring_test_ib,
6592 	.insert_nop = amdgpu_ring_insert_nop,
6593 	.pad_ib = amdgpu_ring_generic_pad_ib,
6594 	.get_emit_ib_size = gfx_v8_0_ring_get_emit_ib_size_compute,
6595 	.get_dma_frame_size = gfx_v8_0_ring_get_dma_frame_size_compute,
6596 };
6597 
6598 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6599 {
6600 	int i;
6601 
6602 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6603 		adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6604 
6605 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6606 		adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6607 }
6608 
6609 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6610 	.set = gfx_v8_0_set_eop_interrupt_state,
6611 	.process = gfx_v8_0_eop_irq,
6612 };
6613 
6614 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6615 	.set = gfx_v8_0_set_priv_reg_fault_state,
6616 	.process = gfx_v8_0_priv_reg_irq,
6617 };
6618 
6619 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6620 	.set = gfx_v8_0_set_priv_inst_fault_state,
6621 	.process = gfx_v8_0_priv_inst_irq,
6622 };
6623 
6624 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6625 {
6626 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6627 	adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6628 
6629 	adev->gfx.priv_reg_irq.num_types = 1;
6630 	adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6631 
6632 	adev->gfx.priv_inst_irq.num_types = 1;
6633 	adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6634 }
6635 
6636 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6637 {
6638 	switch (adev->asic_type) {
6639 	case CHIP_TOPAZ:
6640 		adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6641 		break;
6642 	case CHIP_STONEY:
6643 	case CHIP_CARRIZO:
6644 		adev->gfx.rlc.funcs = &cz_rlc_funcs;
6645 		break;
6646 	default:
6647 		adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
6648 		break;
6649 	}
6650 }
6651 
6652 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6653 {
6654 	/* init asci gds info */
6655 	adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6656 	adev->gds.gws.total_size = 64;
6657 	adev->gds.oa.total_size = 16;
6658 
6659 	if (adev->gds.mem.total_size == 64 * 1024) {
6660 		adev->gds.mem.gfx_partition_size = 4096;
6661 		adev->gds.mem.cs_partition_size = 4096;
6662 
6663 		adev->gds.gws.gfx_partition_size = 4;
6664 		adev->gds.gws.cs_partition_size = 4;
6665 
6666 		adev->gds.oa.gfx_partition_size = 4;
6667 		adev->gds.oa.cs_partition_size = 1;
6668 	} else {
6669 		adev->gds.mem.gfx_partition_size = 1024;
6670 		adev->gds.mem.cs_partition_size = 1024;
6671 
6672 		adev->gds.gws.gfx_partition_size = 16;
6673 		adev->gds.gws.cs_partition_size = 16;
6674 
6675 		adev->gds.oa.gfx_partition_size = 4;
6676 		adev->gds.oa.cs_partition_size = 4;
6677 	}
6678 }
6679 
6680 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6681 						 u32 bitmap)
6682 {
6683 	u32 data;
6684 
6685 	if (!bitmap)
6686 		return;
6687 
6688 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6689 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6690 
6691 	WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6692 }
6693 
6694 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6695 {
6696 	u32 data, mask;
6697 
6698 	data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
6699 		RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6700 
6701 	mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
6702 
6703 	return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
6704 }
6705 
6706 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6707 {
6708 	int i, j, k, counter, active_cu_number = 0;
6709 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6710 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6711 	unsigned disable_masks[4 * 2];
6712 
6713 	memset(cu_info, 0, sizeof(*cu_info));
6714 
6715 	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
6716 
6717 	mutex_lock(&adev->grbm_idx_mutex);
6718 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6719 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6720 			mask = 1;
6721 			ao_bitmap = 0;
6722 			counter = 0;
6723 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
6724 			if (i < 4 && j < 2)
6725 				gfx_v8_0_set_user_cu_inactive_bitmap(
6726 					adev, disable_masks[i * 2 + j]);
6727 			bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6728 			cu_info->bitmap[i][j] = bitmap;
6729 
6730 			for (k = 0; k < 16; k ++) {
6731 				if (bitmap & mask) {
6732 					if (counter < 2)
6733 						ao_bitmap |= mask;
6734 					counter ++;
6735 				}
6736 				mask <<= 1;
6737 			}
6738 			active_cu_number += counter;
6739 			ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6740 		}
6741 	}
6742 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6743 	mutex_unlock(&adev->grbm_idx_mutex);
6744 
6745 	cu_info->number = active_cu_number;
6746 	cu_info->ao_cu_mask = ao_cu_mask;
6747 }
6748