xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c (revision e4781421e883340b796da5a724bda7226817990b)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vi_structs.h"
29 #include "vid.h"
30 #include "amdgpu_ucode.h"
31 #include "amdgpu_atombios.h"
32 #include "atombios_i2c.h"
33 #include "clearstate_vi.h"
34 
35 #include "gmc/gmc_8_2_d.h"
36 #include "gmc/gmc_8_2_sh_mask.h"
37 
38 #include "oss/oss_3_0_d.h"
39 #include "oss/oss_3_0_sh_mask.h"
40 
41 #include "bif/bif_5_0_d.h"
42 #include "bif/bif_5_0_sh_mask.h"
43 
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
48 
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
51 
52 #include "smu/smu_7_1_3_d.h"
53 
54 #define GFX8_NUM_GFX_RINGS     1
55 #define GFX8_NUM_COMPUTE_RINGS 8
56 
57 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
59 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
60 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
61 
62 #define ARRAY_MODE(x)					((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
63 #define PIPE_CONFIG(x)					((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
64 #define TILE_SPLIT(x)					((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
65 #define MICRO_TILE_MODE_NEW(x)				((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
66 #define SAMPLE_SPLIT(x)					((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
67 #define BANK_WIDTH(x)					((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
68 #define BANK_HEIGHT(x)					((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
69 #define MACRO_TILE_ASPECT(x)				((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
70 #define NUM_BANKS(x)					((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
71 
72 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
73 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
74 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
76 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
77 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
78 
79 /* BPM SERDES CMD */
80 #define SET_BPM_SERDES_CMD    1
81 #define CLE_BPM_SERDES_CMD    0
82 
83 /* BPM Register Address*/
84 enum {
85 	BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
86 	BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
87 	BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
88 	BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
89 	BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
90 	BPM_REG_FGCG_MAX
91 };
92 
93 #define RLC_FormatDirectRegListLength        14
94 
95 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
101 
102 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
106 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
107 
108 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
114 
115 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
119 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
120 
121 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
127 
128 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
133 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
134 
135 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
141 
142 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
143 {
144 	{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
145 	{mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
146 	{mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
147 	{mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
148 	{mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
149 	{mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
150 	{mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
151 	{mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
152 	{mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
153 	{mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
154 	{mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
155 	{mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
156 	{mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
157 	{mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
158 	{mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
159 	{mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
160 };
161 
162 static const u32 golden_settings_tonga_a11[] =
163 {
164 	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
165 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
166 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
167 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
168 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
169 	mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
170 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
171 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
172 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
173 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
174 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
175 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
176 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
177 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
178 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
179 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
180 };
181 
182 static const u32 tonga_golden_common_all[] =
183 {
184 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
185 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
186 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
187 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
188 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
189 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
190 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
191 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
192 };
193 
194 static const u32 tonga_mgcg_cgcg_init[] =
195 {
196 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
197 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
198 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
199 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
200 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
201 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
202 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
203 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
204 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
205 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
206 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
207 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
208 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
209 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
210 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
211 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
212 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
213 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
214 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
215 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
216 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
217 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
218 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
219 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
220 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
221 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
222 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
223 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
224 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
225 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
226 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
227 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
228 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
229 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
230 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
231 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
232 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
233 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
234 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
235 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
236 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
237 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
238 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
239 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
240 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
241 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
242 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
243 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
244 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
245 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
246 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
247 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
248 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
249 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
250 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
251 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
252 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
253 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
254 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
255 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
256 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
257 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
258 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
259 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
260 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
261 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
262 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
263 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
264 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
265 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
266 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
267 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
268 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
269 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
270 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
271 };
272 
273 static const u32 golden_settings_polaris11_a11[] =
274 {
275 	mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
276 	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
277 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
278 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
279 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
280 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
281 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
282 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
283 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
284 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
285 	mmSQ_CONFIG, 0x07f80000, 0x01180000,
286 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
287 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
288 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
289 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
290 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
291 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
292 };
293 
294 static const u32 polaris11_golden_common_all[] =
295 {
296 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
297 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
298 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
299 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
300 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
301 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
302 };
303 
304 static const u32 golden_settings_polaris10_a11[] =
305 {
306 	mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
307 	mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
308 	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
309 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
310 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
311 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
312 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
313 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
314 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
315 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
316 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
317 	mmSQ_CONFIG, 0x07f80000, 0x07180000,
318 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
319 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
320 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
321 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
322 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
323 };
324 
325 static const u32 polaris10_golden_common_all[] =
326 {
327 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
328 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
329 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
330 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
331 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
332 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
333 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
334 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
335 };
336 
337 static const u32 fiji_golden_common_all[] =
338 {
339 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
340 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
341 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
342 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
343 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
344 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
345 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
346 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
347 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
348 	mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
349 };
350 
351 static const u32 golden_settings_fiji_a10[] =
352 {
353 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
354 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
355 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
356 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
357 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
358 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
359 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
360 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
361 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
362 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
363 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
364 };
365 
366 static const u32 fiji_mgcg_cgcg_init[] =
367 {
368 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
369 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
370 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
371 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
372 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
373 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
374 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
375 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
376 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
377 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
378 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
379 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
380 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
381 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
382 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
383 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
384 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
385 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
386 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
387 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
388 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
389 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
390 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
391 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
392 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
393 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
394 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
395 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
396 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
397 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
398 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
399 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
400 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
401 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
402 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
403 };
404 
405 static const u32 golden_settings_iceland_a11[] =
406 {
407 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
408 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
409 	mmDB_DEBUG3, 0xc0000000, 0xc0000000,
410 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
411 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
412 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
413 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
414 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
415 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
416 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
417 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
418 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
419 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
420 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
421 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
422 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
423 };
424 
425 static const u32 iceland_golden_common_all[] =
426 {
427 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
428 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
429 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
430 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
431 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
432 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
433 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
434 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
435 };
436 
437 static const u32 iceland_mgcg_cgcg_init[] =
438 {
439 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
440 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
441 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
442 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
443 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
444 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
445 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
446 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
447 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
448 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
449 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
450 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
451 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
452 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
453 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
454 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
455 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
456 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
457 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
458 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
459 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
460 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
461 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
462 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
463 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
464 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
465 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
466 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
467 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
468 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
469 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
470 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
471 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
472 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
473 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
474 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
475 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
476 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
477 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
478 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
479 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
480 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
481 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
482 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
483 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
484 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
485 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
486 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
487 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
488 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
489 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
490 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
491 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
492 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
493 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
494 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
495 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
496 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
497 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
498 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
499 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
500 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
501 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
502 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
503 };
504 
505 static const u32 cz_golden_settings_a11[] =
506 {
507 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
508 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
509 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
510 	mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
511 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
512 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
513 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
514 	mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
515 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
516 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
517 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
518 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
519 };
520 
521 static const u32 cz_golden_common_all[] =
522 {
523 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
524 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
525 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
526 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
527 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
528 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
529 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
530 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
531 };
532 
533 static const u32 cz_mgcg_cgcg_init[] =
534 {
535 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
536 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
537 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
538 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
539 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
540 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
541 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
542 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
543 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
544 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
545 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
546 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
547 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
548 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
549 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
550 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
551 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
552 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
553 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
554 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
555 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
556 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
557 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
558 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
559 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
560 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
561 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
562 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
563 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
564 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
565 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
566 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
567 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
568 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
569 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
570 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
571 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
572 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
573 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
574 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
575 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
576 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
577 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
578 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
579 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
580 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
581 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
582 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
583 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
584 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
585 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
586 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
587 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
588 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
589 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
590 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
591 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
592 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
593 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
594 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
595 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
596 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
597 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
598 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
599 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
600 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
601 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
602 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
603 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
604 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
605 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
606 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
607 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
608 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
609 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
610 };
611 
612 static const u32 stoney_golden_settings_a11[] =
613 {
614 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
615 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
616 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
617 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
618 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
619 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
620 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
621 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
622 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
623 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
624 };
625 
626 static const u32 stoney_golden_common_all[] =
627 {
628 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
629 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
630 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
631 	mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
632 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
633 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
634 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
635 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
636 };
637 
638 static const u32 stoney_mgcg_cgcg_init[] =
639 {
640 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
641 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
642 	mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
643 	mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
644 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
645 };
646 
647 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
648 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
649 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
650 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
651 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
652 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
653 
654 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
655 {
656 	switch (adev->asic_type) {
657 	case CHIP_TOPAZ:
658 		amdgpu_program_register_sequence(adev,
659 						 iceland_mgcg_cgcg_init,
660 						 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
661 		amdgpu_program_register_sequence(adev,
662 						 golden_settings_iceland_a11,
663 						 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
664 		amdgpu_program_register_sequence(adev,
665 						 iceland_golden_common_all,
666 						 (const u32)ARRAY_SIZE(iceland_golden_common_all));
667 		break;
668 	case CHIP_FIJI:
669 		amdgpu_program_register_sequence(adev,
670 						 fiji_mgcg_cgcg_init,
671 						 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
672 		amdgpu_program_register_sequence(adev,
673 						 golden_settings_fiji_a10,
674 						 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
675 		amdgpu_program_register_sequence(adev,
676 						 fiji_golden_common_all,
677 						 (const u32)ARRAY_SIZE(fiji_golden_common_all));
678 		break;
679 
680 	case CHIP_TONGA:
681 		amdgpu_program_register_sequence(adev,
682 						 tonga_mgcg_cgcg_init,
683 						 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
684 		amdgpu_program_register_sequence(adev,
685 						 golden_settings_tonga_a11,
686 						 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
687 		amdgpu_program_register_sequence(adev,
688 						 tonga_golden_common_all,
689 						 (const u32)ARRAY_SIZE(tonga_golden_common_all));
690 		break;
691 	case CHIP_POLARIS11:
692 		amdgpu_program_register_sequence(adev,
693 						 golden_settings_polaris11_a11,
694 						 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
695 		amdgpu_program_register_sequence(adev,
696 						 polaris11_golden_common_all,
697 						 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
698 		break;
699 	case CHIP_POLARIS10:
700 		amdgpu_program_register_sequence(adev,
701 						 golden_settings_polaris10_a11,
702 						 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
703 		amdgpu_program_register_sequence(adev,
704 						 polaris10_golden_common_all,
705 						 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
706 		WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
707 		if (adev->pdev->revision == 0xc7 &&
708 		    ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
709 		     (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
710 		     (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
711 			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
712 			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
713 		}
714 		break;
715 	case CHIP_CARRIZO:
716 		amdgpu_program_register_sequence(adev,
717 						 cz_mgcg_cgcg_init,
718 						 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
719 		amdgpu_program_register_sequence(adev,
720 						 cz_golden_settings_a11,
721 						 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
722 		amdgpu_program_register_sequence(adev,
723 						 cz_golden_common_all,
724 						 (const u32)ARRAY_SIZE(cz_golden_common_all));
725 		break;
726 	case CHIP_STONEY:
727 		amdgpu_program_register_sequence(adev,
728 						 stoney_mgcg_cgcg_init,
729 						 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
730 		amdgpu_program_register_sequence(adev,
731 						 stoney_golden_settings_a11,
732 						 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
733 		amdgpu_program_register_sequence(adev,
734 						 stoney_golden_common_all,
735 						 (const u32)ARRAY_SIZE(stoney_golden_common_all));
736 		break;
737 	default:
738 		break;
739 	}
740 }
741 
742 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
743 {
744 	int i;
745 
746 	adev->gfx.scratch.num_reg = 7;
747 	adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
748 	for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
749 		adev->gfx.scratch.free[i] = true;
750 		adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
751 	}
752 }
753 
754 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
755 {
756 	struct amdgpu_device *adev = ring->adev;
757 	uint32_t scratch;
758 	uint32_t tmp = 0;
759 	unsigned i;
760 	int r;
761 
762 	r = amdgpu_gfx_scratch_get(adev, &scratch);
763 	if (r) {
764 		DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
765 		return r;
766 	}
767 	WREG32(scratch, 0xCAFEDEAD);
768 	r = amdgpu_ring_alloc(ring, 3);
769 	if (r) {
770 		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
771 			  ring->idx, r);
772 		amdgpu_gfx_scratch_free(adev, scratch);
773 		return r;
774 	}
775 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
776 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
777 	amdgpu_ring_write(ring, 0xDEADBEEF);
778 	amdgpu_ring_commit(ring);
779 
780 	for (i = 0; i < adev->usec_timeout; i++) {
781 		tmp = RREG32(scratch);
782 		if (tmp == 0xDEADBEEF)
783 			break;
784 		DRM_UDELAY(1);
785 	}
786 	if (i < adev->usec_timeout) {
787 		DRM_INFO("ring test on %d succeeded in %d usecs\n",
788 			 ring->idx, i);
789 	} else {
790 		DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
791 			  ring->idx, scratch, tmp);
792 		r = -EINVAL;
793 	}
794 	amdgpu_gfx_scratch_free(adev, scratch);
795 	return r;
796 }
797 
798 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
799 {
800 	struct amdgpu_device *adev = ring->adev;
801 	struct amdgpu_ib ib;
802 	struct dma_fence *f = NULL;
803 	uint32_t scratch;
804 	uint32_t tmp = 0;
805 	long r;
806 
807 	r = amdgpu_gfx_scratch_get(adev, &scratch);
808 	if (r) {
809 		DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
810 		return r;
811 	}
812 	WREG32(scratch, 0xCAFEDEAD);
813 	memset(&ib, 0, sizeof(ib));
814 	r = amdgpu_ib_get(adev, NULL, 256, &ib);
815 	if (r) {
816 		DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
817 		goto err1;
818 	}
819 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
820 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
821 	ib.ptr[2] = 0xDEADBEEF;
822 	ib.length_dw = 3;
823 
824 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
825 	if (r)
826 		goto err2;
827 
828 	r = dma_fence_wait_timeout(f, false, timeout);
829 	if (r == 0) {
830 		DRM_ERROR("amdgpu: IB test timed out.\n");
831 		r = -ETIMEDOUT;
832 		goto err2;
833 	} else if (r < 0) {
834 		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
835 		goto err2;
836 	}
837 	tmp = RREG32(scratch);
838 	if (tmp == 0xDEADBEEF) {
839 		DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
840 		r = 0;
841 	} else {
842 		DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
843 			  scratch, tmp);
844 		r = -EINVAL;
845 	}
846 err2:
847 	amdgpu_ib_free(adev, &ib, NULL);
848 	dma_fence_put(f);
849 err1:
850 	amdgpu_gfx_scratch_free(adev, scratch);
851 	return r;
852 }
853 
854 
855 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
856 	release_firmware(adev->gfx.pfp_fw);
857 	adev->gfx.pfp_fw = NULL;
858 	release_firmware(adev->gfx.me_fw);
859 	adev->gfx.me_fw = NULL;
860 	release_firmware(adev->gfx.ce_fw);
861 	adev->gfx.ce_fw = NULL;
862 	release_firmware(adev->gfx.rlc_fw);
863 	adev->gfx.rlc_fw = NULL;
864 	release_firmware(adev->gfx.mec_fw);
865 	adev->gfx.mec_fw = NULL;
866 	if ((adev->asic_type != CHIP_STONEY) &&
867 	    (adev->asic_type != CHIP_TOPAZ))
868 		release_firmware(adev->gfx.mec2_fw);
869 	adev->gfx.mec2_fw = NULL;
870 
871 	kfree(adev->gfx.rlc.register_list_format);
872 }
873 
874 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
875 {
876 	const char *chip_name;
877 	char fw_name[30];
878 	int err;
879 	struct amdgpu_firmware_info *info = NULL;
880 	const struct common_firmware_header *header = NULL;
881 	const struct gfx_firmware_header_v1_0 *cp_hdr;
882 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
883 	unsigned int *tmp = NULL, i;
884 
885 	DRM_DEBUG("\n");
886 
887 	switch (adev->asic_type) {
888 	case CHIP_TOPAZ:
889 		chip_name = "topaz";
890 		break;
891 	case CHIP_TONGA:
892 		chip_name = "tonga";
893 		break;
894 	case CHIP_CARRIZO:
895 		chip_name = "carrizo";
896 		break;
897 	case CHIP_FIJI:
898 		chip_name = "fiji";
899 		break;
900 	case CHIP_POLARIS11:
901 		chip_name = "polaris11";
902 		break;
903 	case CHIP_POLARIS10:
904 		chip_name = "polaris10";
905 		break;
906 	case CHIP_STONEY:
907 		chip_name = "stoney";
908 		break;
909 	default:
910 		BUG();
911 	}
912 
913 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
914 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
915 	if (err)
916 		goto out;
917 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
918 	if (err)
919 		goto out;
920 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
921 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
922 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
923 
924 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
925 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
926 	if (err)
927 		goto out;
928 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
929 	if (err)
930 		goto out;
931 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
932 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
933 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
934 
935 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
936 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
937 	if (err)
938 		goto out;
939 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
940 	if (err)
941 		goto out;
942 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
943 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
944 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
945 
946 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
947 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
948 	if (err)
949 		goto out;
950 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
951 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
952 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
953 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
954 
955 	adev->gfx.rlc.save_and_restore_offset =
956 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
957 	adev->gfx.rlc.clear_state_descriptor_offset =
958 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
959 	adev->gfx.rlc.avail_scratch_ram_locations =
960 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
961 	adev->gfx.rlc.reg_restore_list_size =
962 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
963 	adev->gfx.rlc.reg_list_format_start =
964 			le32_to_cpu(rlc_hdr->reg_list_format_start);
965 	adev->gfx.rlc.reg_list_format_separate_start =
966 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
967 	adev->gfx.rlc.starting_offsets_start =
968 			le32_to_cpu(rlc_hdr->starting_offsets_start);
969 	adev->gfx.rlc.reg_list_format_size_bytes =
970 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
971 	adev->gfx.rlc.reg_list_size_bytes =
972 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
973 
974 	adev->gfx.rlc.register_list_format =
975 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
976 					adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
977 
978 	if (!adev->gfx.rlc.register_list_format) {
979 		err = -ENOMEM;
980 		goto out;
981 	}
982 
983 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
984 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
985 	for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
986 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
987 
988 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
989 
990 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
991 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
992 	for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
993 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
994 
995 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
996 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
997 	if (err)
998 		goto out;
999 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1000 	if (err)
1001 		goto out;
1002 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1003 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1004 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1005 
1006 	if ((adev->asic_type != CHIP_STONEY) &&
1007 	    (adev->asic_type != CHIP_TOPAZ)) {
1008 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1009 		err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1010 		if (!err) {
1011 			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1012 			if (err)
1013 				goto out;
1014 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1015 				adev->gfx.mec2_fw->data;
1016 			adev->gfx.mec2_fw_version =
1017 				le32_to_cpu(cp_hdr->header.ucode_version);
1018 			adev->gfx.mec2_feature_version =
1019 				le32_to_cpu(cp_hdr->ucode_feature_version);
1020 		} else {
1021 			err = 0;
1022 			adev->gfx.mec2_fw = NULL;
1023 		}
1024 	}
1025 
1026 	if (adev->firmware.smu_load) {
1027 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1028 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1029 		info->fw = adev->gfx.pfp_fw;
1030 		header = (const struct common_firmware_header *)info->fw->data;
1031 		adev->firmware.fw_size +=
1032 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1033 
1034 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1035 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1036 		info->fw = adev->gfx.me_fw;
1037 		header = (const struct common_firmware_header *)info->fw->data;
1038 		adev->firmware.fw_size +=
1039 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1040 
1041 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1042 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1043 		info->fw = adev->gfx.ce_fw;
1044 		header = (const struct common_firmware_header *)info->fw->data;
1045 		adev->firmware.fw_size +=
1046 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1047 
1048 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1049 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1050 		info->fw = adev->gfx.rlc_fw;
1051 		header = (const struct common_firmware_header *)info->fw->data;
1052 		adev->firmware.fw_size +=
1053 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1054 
1055 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1056 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1057 		info->fw = adev->gfx.mec_fw;
1058 		header = (const struct common_firmware_header *)info->fw->data;
1059 		adev->firmware.fw_size +=
1060 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1061 
1062 		/* we need account JT in */
1063 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1064 		adev->firmware.fw_size +=
1065 			ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1066 
1067 		if (amdgpu_sriov_vf(adev)) {
1068 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1069 			info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1070 			info->fw = adev->gfx.mec_fw;
1071 			adev->firmware.fw_size +=
1072 				ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1073 		}
1074 
1075 		if (adev->gfx.mec2_fw) {
1076 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1077 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1078 			info->fw = adev->gfx.mec2_fw;
1079 			header = (const struct common_firmware_header *)info->fw->data;
1080 			adev->firmware.fw_size +=
1081 				ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1082 		}
1083 
1084 	}
1085 
1086 out:
1087 	if (err) {
1088 		dev_err(adev->dev,
1089 			"gfx8: Failed to load firmware \"%s\"\n",
1090 			fw_name);
1091 		release_firmware(adev->gfx.pfp_fw);
1092 		adev->gfx.pfp_fw = NULL;
1093 		release_firmware(adev->gfx.me_fw);
1094 		adev->gfx.me_fw = NULL;
1095 		release_firmware(adev->gfx.ce_fw);
1096 		adev->gfx.ce_fw = NULL;
1097 		release_firmware(adev->gfx.rlc_fw);
1098 		adev->gfx.rlc_fw = NULL;
1099 		release_firmware(adev->gfx.mec_fw);
1100 		adev->gfx.mec_fw = NULL;
1101 		release_firmware(adev->gfx.mec2_fw);
1102 		adev->gfx.mec2_fw = NULL;
1103 	}
1104 	return err;
1105 }
1106 
1107 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1108 				    volatile u32 *buffer)
1109 {
1110 	u32 count = 0, i;
1111 	const struct cs_section_def *sect = NULL;
1112 	const struct cs_extent_def *ext = NULL;
1113 
1114 	if (adev->gfx.rlc.cs_data == NULL)
1115 		return;
1116 	if (buffer == NULL)
1117 		return;
1118 
1119 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1120 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1121 
1122 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1123 	buffer[count++] = cpu_to_le32(0x80000000);
1124 	buffer[count++] = cpu_to_le32(0x80000000);
1125 
1126 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1127 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1128 			if (sect->id == SECT_CONTEXT) {
1129 				buffer[count++] =
1130 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1131 				buffer[count++] = cpu_to_le32(ext->reg_index -
1132 						PACKET3_SET_CONTEXT_REG_START);
1133 				for (i = 0; i < ext->reg_count; i++)
1134 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1135 			} else {
1136 				return;
1137 			}
1138 		}
1139 	}
1140 
1141 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1142 	buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1143 			PACKET3_SET_CONTEXT_REG_START);
1144 	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1145 	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1146 
1147 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1148 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1149 
1150 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1151 	buffer[count++] = cpu_to_le32(0);
1152 }
1153 
1154 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1155 {
1156 	const __le32 *fw_data;
1157 	volatile u32 *dst_ptr;
1158 	int me, i, max_me = 4;
1159 	u32 bo_offset = 0;
1160 	u32 table_offset, table_size;
1161 
1162 	if (adev->asic_type == CHIP_CARRIZO)
1163 		max_me = 5;
1164 
1165 	/* write the cp table buffer */
1166 	dst_ptr = adev->gfx.rlc.cp_table_ptr;
1167 	for (me = 0; me < max_me; me++) {
1168 		if (me == 0) {
1169 			const struct gfx_firmware_header_v1_0 *hdr =
1170 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1171 			fw_data = (const __le32 *)
1172 				(adev->gfx.ce_fw->data +
1173 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1174 			table_offset = le32_to_cpu(hdr->jt_offset);
1175 			table_size = le32_to_cpu(hdr->jt_size);
1176 		} else if (me == 1) {
1177 			const struct gfx_firmware_header_v1_0 *hdr =
1178 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1179 			fw_data = (const __le32 *)
1180 				(adev->gfx.pfp_fw->data +
1181 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1182 			table_offset = le32_to_cpu(hdr->jt_offset);
1183 			table_size = le32_to_cpu(hdr->jt_size);
1184 		} else if (me == 2) {
1185 			const struct gfx_firmware_header_v1_0 *hdr =
1186 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1187 			fw_data = (const __le32 *)
1188 				(adev->gfx.me_fw->data +
1189 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1190 			table_offset = le32_to_cpu(hdr->jt_offset);
1191 			table_size = le32_to_cpu(hdr->jt_size);
1192 		} else if (me == 3) {
1193 			const struct gfx_firmware_header_v1_0 *hdr =
1194 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1195 			fw_data = (const __le32 *)
1196 				(adev->gfx.mec_fw->data +
1197 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1198 			table_offset = le32_to_cpu(hdr->jt_offset);
1199 			table_size = le32_to_cpu(hdr->jt_size);
1200 		} else  if (me == 4) {
1201 			const struct gfx_firmware_header_v1_0 *hdr =
1202 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1203 			fw_data = (const __le32 *)
1204 				(adev->gfx.mec2_fw->data +
1205 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1206 			table_offset = le32_to_cpu(hdr->jt_offset);
1207 			table_size = le32_to_cpu(hdr->jt_size);
1208 		}
1209 
1210 		for (i = 0; i < table_size; i ++) {
1211 			dst_ptr[bo_offset + i] =
1212 				cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1213 		}
1214 
1215 		bo_offset += table_size;
1216 	}
1217 }
1218 
1219 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1220 {
1221 	int r;
1222 
1223 	/* clear state block */
1224 	if (adev->gfx.rlc.clear_state_obj) {
1225 		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1226 		if (unlikely(r != 0))
1227 			dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
1228 		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1229 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1230 		amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1231 		adev->gfx.rlc.clear_state_obj = NULL;
1232 	}
1233 
1234 	/* jump table block */
1235 	if (adev->gfx.rlc.cp_table_obj) {
1236 		r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1237 		if (unlikely(r != 0))
1238 			dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1239 		amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1240 		amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1241 		amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1242 		adev->gfx.rlc.cp_table_obj = NULL;
1243 	}
1244 }
1245 
1246 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1247 {
1248 	volatile u32 *dst_ptr;
1249 	u32 dws;
1250 	const struct cs_section_def *cs_data;
1251 	int r;
1252 
1253 	adev->gfx.rlc.cs_data = vi_cs_data;
1254 
1255 	cs_data = adev->gfx.rlc.cs_data;
1256 
1257 	if (cs_data) {
1258 		/* clear state block */
1259 		adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1260 
1261 		if (adev->gfx.rlc.clear_state_obj == NULL) {
1262 			r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1263 					     AMDGPU_GEM_DOMAIN_VRAM,
1264 					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1265 					     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1266 					     NULL, NULL,
1267 					     &adev->gfx.rlc.clear_state_obj);
1268 			if (r) {
1269 				dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1270 				gfx_v8_0_rlc_fini(adev);
1271 				return r;
1272 			}
1273 		}
1274 		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1275 		if (unlikely(r != 0)) {
1276 			gfx_v8_0_rlc_fini(adev);
1277 			return r;
1278 		}
1279 		r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1280 				  &adev->gfx.rlc.clear_state_gpu_addr);
1281 		if (r) {
1282 			amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1283 			dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
1284 			gfx_v8_0_rlc_fini(adev);
1285 			return r;
1286 		}
1287 
1288 		r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1289 		if (r) {
1290 			dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
1291 			gfx_v8_0_rlc_fini(adev);
1292 			return r;
1293 		}
1294 		/* set up the cs buffer */
1295 		dst_ptr = adev->gfx.rlc.cs_ptr;
1296 		gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1297 		amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1298 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1299 	}
1300 
1301 	if ((adev->asic_type == CHIP_CARRIZO) ||
1302 	    (adev->asic_type == CHIP_STONEY)) {
1303 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1304 		if (adev->gfx.rlc.cp_table_obj == NULL) {
1305 			r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1306 					     AMDGPU_GEM_DOMAIN_VRAM,
1307 					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1308 					     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1309 					     NULL, NULL,
1310 					     &adev->gfx.rlc.cp_table_obj);
1311 			if (r) {
1312 				dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1313 				return r;
1314 			}
1315 		}
1316 
1317 		r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1318 		if (unlikely(r != 0)) {
1319 			dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1320 			return r;
1321 		}
1322 		r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1323 				  &adev->gfx.rlc.cp_table_gpu_addr);
1324 		if (r) {
1325 			amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1326 			dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
1327 			return r;
1328 		}
1329 		r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1330 		if (r) {
1331 			dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1332 			return r;
1333 		}
1334 
1335 		cz_init_cp_jump_table(adev);
1336 
1337 		amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1338 		amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1339 	}
1340 
1341 	return 0;
1342 }
1343 
1344 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1345 {
1346 	int r;
1347 
1348 	if (adev->gfx.mec.hpd_eop_obj) {
1349 		r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1350 		if (unlikely(r != 0))
1351 			dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1352 		amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1353 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1354 		amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1355 		adev->gfx.mec.hpd_eop_obj = NULL;
1356 	}
1357 }
1358 
1359 #define MEC_HPD_SIZE 2048
1360 
1361 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1362 {
1363 	int r;
1364 	u32 *hpd;
1365 
1366 	/*
1367 	 * we assign only 1 pipe because all other pipes will
1368 	 * be handled by KFD
1369 	 */
1370 	adev->gfx.mec.num_mec = 1;
1371 	adev->gfx.mec.num_pipe = 1;
1372 	adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1373 
1374 	if (adev->gfx.mec.hpd_eop_obj == NULL) {
1375 		r = amdgpu_bo_create(adev,
1376 				     adev->gfx.mec.num_queue * MEC_HPD_SIZE,
1377 				     PAGE_SIZE, true,
1378 				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1379 				     &adev->gfx.mec.hpd_eop_obj);
1380 		if (r) {
1381 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1382 			return r;
1383 		}
1384 	}
1385 
1386 	r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1387 	if (unlikely(r != 0)) {
1388 		gfx_v8_0_mec_fini(adev);
1389 		return r;
1390 	}
1391 	r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1392 			  &adev->gfx.mec.hpd_eop_gpu_addr);
1393 	if (r) {
1394 		dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1395 		gfx_v8_0_mec_fini(adev);
1396 		return r;
1397 	}
1398 	r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1399 	if (r) {
1400 		dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1401 		gfx_v8_0_mec_fini(adev);
1402 		return r;
1403 	}
1404 
1405 	memset(hpd, 0, adev->gfx.mec.num_queue * MEC_HPD_SIZE);
1406 
1407 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1408 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1409 
1410 	return 0;
1411 }
1412 
1413 static const u32 vgpr_init_compute_shader[] =
1414 {
1415 	0x7e000209, 0x7e020208,
1416 	0x7e040207, 0x7e060206,
1417 	0x7e080205, 0x7e0a0204,
1418 	0x7e0c0203, 0x7e0e0202,
1419 	0x7e100201, 0x7e120200,
1420 	0x7e140209, 0x7e160208,
1421 	0x7e180207, 0x7e1a0206,
1422 	0x7e1c0205, 0x7e1e0204,
1423 	0x7e200203, 0x7e220202,
1424 	0x7e240201, 0x7e260200,
1425 	0x7e280209, 0x7e2a0208,
1426 	0x7e2c0207, 0x7e2e0206,
1427 	0x7e300205, 0x7e320204,
1428 	0x7e340203, 0x7e360202,
1429 	0x7e380201, 0x7e3a0200,
1430 	0x7e3c0209, 0x7e3e0208,
1431 	0x7e400207, 0x7e420206,
1432 	0x7e440205, 0x7e460204,
1433 	0x7e480203, 0x7e4a0202,
1434 	0x7e4c0201, 0x7e4e0200,
1435 	0x7e500209, 0x7e520208,
1436 	0x7e540207, 0x7e560206,
1437 	0x7e580205, 0x7e5a0204,
1438 	0x7e5c0203, 0x7e5e0202,
1439 	0x7e600201, 0x7e620200,
1440 	0x7e640209, 0x7e660208,
1441 	0x7e680207, 0x7e6a0206,
1442 	0x7e6c0205, 0x7e6e0204,
1443 	0x7e700203, 0x7e720202,
1444 	0x7e740201, 0x7e760200,
1445 	0x7e780209, 0x7e7a0208,
1446 	0x7e7c0207, 0x7e7e0206,
1447 	0xbf8a0000, 0xbf810000,
1448 };
1449 
1450 static const u32 sgpr_init_compute_shader[] =
1451 {
1452 	0xbe8a0100, 0xbe8c0102,
1453 	0xbe8e0104, 0xbe900106,
1454 	0xbe920108, 0xbe940100,
1455 	0xbe960102, 0xbe980104,
1456 	0xbe9a0106, 0xbe9c0108,
1457 	0xbe9e0100, 0xbea00102,
1458 	0xbea20104, 0xbea40106,
1459 	0xbea60108, 0xbea80100,
1460 	0xbeaa0102, 0xbeac0104,
1461 	0xbeae0106, 0xbeb00108,
1462 	0xbeb20100, 0xbeb40102,
1463 	0xbeb60104, 0xbeb80106,
1464 	0xbeba0108, 0xbebc0100,
1465 	0xbebe0102, 0xbec00104,
1466 	0xbec20106, 0xbec40108,
1467 	0xbec60100, 0xbec80102,
1468 	0xbee60004, 0xbee70005,
1469 	0xbeea0006, 0xbeeb0007,
1470 	0xbee80008, 0xbee90009,
1471 	0xbefc0000, 0xbf8a0000,
1472 	0xbf810000, 0x00000000,
1473 };
1474 
1475 static const u32 vgpr_init_regs[] =
1476 {
1477 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1478 	mmCOMPUTE_RESOURCE_LIMITS, 0,
1479 	mmCOMPUTE_NUM_THREAD_X, 256*4,
1480 	mmCOMPUTE_NUM_THREAD_Y, 1,
1481 	mmCOMPUTE_NUM_THREAD_Z, 1,
1482 	mmCOMPUTE_PGM_RSRC2, 20,
1483 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1484 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1485 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1486 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1487 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1488 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1489 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1490 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1491 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1492 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1493 };
1494 
1495 static const u32 sgpr1_init_regs[] =
1496 {
1497 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1498 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1499 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1500 	mmCOMPUTE_NUM_THREAD_Y, 1,
1501 	mmCOMPUTE_NUM_THREAD_Z, 1,
1502 	mmCOMPUTE_PGM_RSRC2, 20,
1503 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1504 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1505 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1506 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1507 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1508 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1509 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1510 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1511 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1512 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1513 };
1514 
1515 static const u32 sgpr2_init_regs[] =
1516 {
1517 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1518 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1519 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1520 	mmCOMPUTE_NUM_THREAD_Y, 1,
1521 	mmCOMPUTE_NUM_THREAD_Z, 1,
1522 	mmCOMPUTE_PGM_RSRC2, 20,
1523 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1524 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1525 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1526 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1527 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1528 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1529 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1530 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1531 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1532 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1533 };
1534 
1535 static const u32 sec_ded_counter_registers[] =
1536 {
1537 	mmCPC_EDC_ATC_CNT,
1538 	mmCPC_EDC_SCRATCH_CNT,
1539 	mmCPC_EDC_UCODE_CNT,
1540 	mmCPF_EDC_ATC_CNT,
1541 	mmCPF_EDC_ROQ_CNT,
1542 	mmCPF_EDC_TAG_CNT,
1543 	mmCPG_EDC_ATC_CNT,
1544 	mmCPG_EDC_DMA_CNT,
1545 	mmCPG_EDC_TAG_CNT,
1546 	mmDC_EDC_CSINVOC_CNT,
1547 	mmDC_EDC_RESTORE_CNT,
1548 	mmDC_EDC_STATE_CNT,
1549 	mmGDS_EDC_CNT,
1550 	mmGDS_EDC_GRBM_CNT,
1551 	mmGDS_EDC_OA_DED,
1552 	mmSPI_EDC_CNT,
1553 	mmSQC_ATC_EDC_GATCL1_CNT,
1554 	mmSQC_EDC_CNT,
1555 	mmSQ_EDC_DED_CNT,
1556 	mmSQ_EDC_INFO,
1557 	mmSQ_EDC_SEC_CNT,
1558 	mmTCC_EDC_CNT,
1559 	mmTCP_ATC_EDC_GATCL1_CNT,
1560 	mmTCP_EDC_CNT,
1561 	mmTD_EDC_CNT
1562 };
1563 
1564 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1565 {
1566 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1567 	struct amdgpu_ib ib;
1568 	struct dma_fence *f = NULL;
1569 	int r, i;
1570 	u32 tmp;
1571 	unsigned total_size, vgpr_offset, sgpr_offset;
1572 	u64 gpu_addr;
1573 
1574 	/* only supported on CZ */
1575 	if (adev->asic_type != CHIP_CARRIZO)
1576 		return 0;
1577 
1578 	/* bail if the compute ring is not ready */
1579 	if (!ring->ready)
1580 		return 0;
1581 
1582 	tmp = RREG32(mmGB_EDC_MODE);
1583 	WREG32(mmGB_EDC_MODE, 0);
1584 
1585 	total_size =
1586 		(((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1587 	total_size +=
1588 		(((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1589 	total_size +=
1590 		(((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1591 	total_size = ALIGN(total_size, 256);
1592 	vgpr_offset = total_size;
1593 	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1594 	sgpr_offset = total_size;
1595 	total_size += sizeof(sgpr_init_compute_shader);
1596 
1597 	/* allocate an indirect buffer to put the commands in */
1598 	memset(&ib, 0, sizeof(ib));
1599 	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1600 	if (r) {
1601 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1602 		return r;
1603 	}
1604 
1605 	/* load the compute shaders */
1606 	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1607 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1608 
1609 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1610 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1611 
1612 	/* init the ib length to 0 */
1613 	ib.length_dw = 0;
1614 
1615 	/* VGPR */
1616 	/* write the register state for the compute dispatch */
1617 	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1618 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1619 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1620 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1621 	}
1622 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1623 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1624 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1625 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1626 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1627 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1628 
1629 	/* write dispatch packet */
1630 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1631 	ib.ptr[ib.length_dw++] = 8; /* x */
1632 	ib.ptr[ib.length_dw++] = 1; /* y */
1633 	ib.ptr[ib.length_dw++] = 1; /* z */
1634 	ib.ptr[ib.length_dw++] =
1635 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1636 
1637 	/* write CS partial flush packet */
1638 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1639 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1640 
1641 	/* SGPR1 */
1642 	/* write the register state for the compute dispatch */
1643 	for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1644 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1645 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1646 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1647 	}
1648 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1649 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1650 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1651 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1652 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1653 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1654 
1655 	/* write dispatch packet */
1656 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1657 	ib.ptr[ib.length_dw++] = 8; /* x */
1658 	ib.ptr[ib.length_dw++] = 1; /* y */
1659 	ib.ptr[ib.length_dw++] = 1; /* z */
1660 	ib.ptr[ib.length_dw++] =
1661 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1662 
1663 	/* write CS partial flush packet */
1664 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1665 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1666 
1667 	/* SGPR2 */
1668 	/* write the register state for the compute dispatch */
1669 	for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1670 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1671 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1672 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1673 	}
1674 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1675 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1676 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1677 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1678 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1679 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1680 
1681 	/* write dispatch packet */
1682 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1683 	ib.ptr[ib.length_dw++] = 8; /* x */
1684 	ib.ptr[ib.length_dw++] = 1; /* y */
1685 	ib.ptr[ib.length_dw++] = 1; /* z */
1686 	ib.ptr[ib.length_dw++] =
1687 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1688 
1689 	/* write CS partial flush packet */
1690 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1691 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1692 
1693 	/* shedule the ib on the ring */
1694 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
1695 	if (r) {
1696 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1697 		goto fail;
1698 	}
1699 
1700 	/* wait for the GPU to finish processing the IB */
1701 	r = dma_fence_wait(f, false);
1702 	if (r) {
1703 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1704 		goto fail;
1705 	}
1706 
1707 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1708 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1709 	WREG32(mmGB_EDC_MODE, tmp);
1710 
1711 	tmp = RREG32(mmCC_GC_EDC_CONFIG);
1712 	tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1713 	WREG32(mmCC_GC_EDC_CONFIG, tmp);
1714 
1715 
1716 	/* read back registers to clear the counters */
1717 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1718 		RREG32(sec_ded_counter_registers[i]);
1719 
1720 fail:
1721 	amdgpu_ib_free(adev, &ib, NULL);
1722 	dma_fence_put(f);
1723 
1724 	return r;
1725 }
1726 
1727 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1728 {
1729 	u32 gb_addr_config;
1730 	u32 mc_shared_chmap, mc_arb_ramcfg;
1731 	u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1732 	u32 tmp;
1733 	int ret;
1734 
1735 	switch (adev->asic_type) {
1736 	case CHIP_TOPAZ:
1737 		adev->gfx.config.max_shader_engines = 1;
1738 		adev->gfx.config.max_tile_pipes = 2;
1739 		adev->gfx.config.max_cu_per_sh = 6;
1740 		adev->gfx.config.max_sh_per_se = 1;
1741 		adev->gfx.config.max_backends_per_se = 2;
1742 		adev->gfx.config.max_texture_channel_caches = 2;
1743 		adev->gfx.config.max_gprs = 256;
1744 		adev->gfx.config.max_gs_threads = 32;
1745 		adev->gfx.config.max_hw_contexts = 8;
1746 
1747 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1748 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1749 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1750 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1751 		gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1752 		break;
1753 	case CHIP_FIJI:
1754 		adev->gfx.config.max_shader_engines = 4;
1755 		adev->gfx.config.max_tile_pipes = 16;
1756 		adev->gfx.config.max_cu_per_sh = 16;
1757 		adev->gfx.config.max_sh_per_se = 1;
1758 		adev->gfx.config.max_backends_per_se = 4;
1759 		adev->gfx.config.max_texture_channel_caches = 16;
1760 		adev->gfx.config.max_gprs = 256;
1761 		adev->gfx.config.max_gs_threads = 32;
1762 		adev->gfx.config.max_hw_contexts = 8;
1763 
1764 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1765 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1766 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1767 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1768 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1769 		break;
1770 	case CHIP_POLARIS11:
1771 		ret = amdgpu_atombios_get_gfx_info(adev);
1772 		if (ret)
1773 			return ret;
1774 		adev->gfx.config.max_gprs = 256;
1775 		adev->gfx.config.max_gs_threads = 32;
1776 		adev->gfx.config.max_hw_contexts = 8;
1777 
1778 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1779 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1780 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1781 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1782 		gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1783 		break;
1784 	case CHIP_POLARIS10:
1785 		ret = amdgpu_atombios_get_gfx_info(adev);
1786 		if (ret)
1787 			return ret;
1788 		adev->gfx.config.max_gprs = 256;
1789 		adev->gfx.config.max_gs_threads = 32;
1790 		adev->gfx.config.max_hw_contexts = 8;
1791 
1792 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1793 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1794 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1795 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1796 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1797 		break;
1798 	case CHIP_TONGA:
1799 		adev->gfx.config.max_shader_engines = 4;
1800 		adev->gfx.config.max_tile_pipes = 8;
1801 		adev->gfx.config.max_cu_per_sh = 8;
1802 		adev->gfx.config.max_sh_per_se = 1;
1803 		adev->gfx.config.max_backends_per_se = 2;
1804 		adev->gfx.config.max_texture_channel_caches = 8;
1805 		adev->gfx.config.max_gprs = 256;
1806 		adev->gfx.config.max_gs_threads = 32;
1807 		adev->gfx.config.max_hw_contexts = 8;
1808 
1809 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1810 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1811 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1812 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1813 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1814 		break;
1815 	case CHIP_CARRIZO:
1816 		adev->gfx.config.max_shader_engines = 1;
1817 		adev->gfx.config.max_tile_pipes = 2;
1818 		adev->gfx.config.max_sh_per_se = 1;
1819 		adev->gfx.config.max_backends_per_se = 2;
1820 
1821 		switch (adev->pdev->revision) {
1822 		case 0xc4:
1823 		case 0x84:
1824 		case 0xc8:
1825 		case 0xcc:
1826 		case 0xe1:
1827 		case 0xe3:
1828 			/* B10 */
1829 			adev->gfx.config.max_cu_per_sh = 8;
1830 			break;
1831 		case 0xc5:
1832 		case 0x81:
1833 		case 0x85:
1834 		case 0xc9:
1835 		case 0xcd:
1836 		case 0xe2:
1837 		case 0xe4:
1838 			/* B8 */
1839 			adev->gfx.config.max_cu_per_sh = 6;
1840 			break;
1841 		case 0xc6:
1842 		case 0xca:
1843 		case 0xce:
1844 		case 0x88:
1845 			/* B6 */
1846 			adev->gfx.config.max_cu_per_sh = 6;
1847 			break;
1848 		case 0xc7:
1849 		case 0x87:
1850 		case 0xcb:
1851 		case 0xe5:
1852 		case 0x89:
1853 		default:
1854 			/* B4 */
1855 			adev->gfx.config.max_cu_per_sh = 4;
1856 			break;
1857 		}
1858 
1859 		adev->gfx.config.max_texture_channel_caches = 2;
1860 		adev->gfx.config.max_gprs = 256;
1861 		adev->gfx.config.max_gs_threads = 32;
1862 		adev->gfx.config.max_hw_contexts = 8;
1863 
1864 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1865 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1866 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1867 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1868 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1869 		break;
1870 	case CHIP_STONEY:
1871 		adev->gfx.config.max_shader_engines = 1;
1872 		adev->gfx.config.max_tile_pipes = 2;
1873 		adev->gfx.config.max_sh_per_se = 1;
1874 		adev->gfx.config.max_backends_per_se = 1;
1875 
1876 		switch (adev->pdev->revision) {
1877 		case 0xc0:
1878 		case 0xc1:
1879 		case 0xc2:
1880 		case 0xc4:
1881 		case 0xc8:
1882 		case 0xc9:
1883 			adev->gfx.config.max_cu_per_sh = 3;
1884 			break;
1885 		case 0xd0:
1886 		case 0xd1:
1887 		case 0xd2:
1888 		default:
1889 			adev->gfx.config.max_cu_per_sh = 2;
1890 			break;
1891 		}
1892 
1893 		adev->gfx.config.max_texture_channel_caches = 2;
1894 		adev->gfx.config.max_gprs = 256;
1895 		adev->gfx.config.max_gs_threads = 16;
1896 		adev->gfx.config.max_hw_contexts = 8;
1897 
1898 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1899 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1900 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1901 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1902 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1903 		break;
1904 	default:
1905 		adev->gfx.config.max_shader_engines = 2;
1906 		adev->gfx.config.max_tile_pipes = 4;
1907 		adev->gfx.config.max_cu_per_sh = 2;
1908 		adev->gfx.config.max_sh_per_se = 1;
1909 		adev->gfx.config.max_backends_per_se = 2;
1910 		adev->gfx.config.max_texture_channel_caches = 4;
1911 		adev->gfx.config.max_gprs = 256;
1912 		adev->gfx.config.max_gs_threads = 32;
1913 		adev->gfx.config.max_hw_contexts = 8;
1914 
1915 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1916 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1917 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1918 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1919 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1920 		break;
1921 	}
1922 
1923 	mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1924 	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1925 	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1926 
1927 	adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1928 	adev->gfx.config.mem_max_burst_length_bytes = 256;
1929 	if (adev->flags & AMD_IS_APU) {
1930 		/* Get memory bank mapping mode. */
1931 		tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1932 		dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1933 		dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1934 
1935 		tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1936 		dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1937 		dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1938 
1939 		/* Validate settings in case only one DIMM installed. */
1940 		if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1941 			dimm00_addr_map = 0;
1942 		if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1943 			dimm01_addr_map = 0;
1944 		if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1945 			dimm10_addr_map = 0;
1946 		if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1947 			dimm11_addr_map = 0;
1948 
1949 		/* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1950 		/* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1951 		if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1952 			adev->gfx.config.mem_row_size_in_kb = 2;
1953 		else
1954 			adev->gfx.config.mem_row_size_in_kb = 1;
1955 	} else {
1956 		tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1957 		adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1958 		if (adev->gfx.config.mem_row_size_in_kb > 4)
1959 			adev->gfx.config.mem_row_size_in_kb = 4;
1960 	}
1961 
1962 	adev->gfx.config.shader_engine_tile_size = 32;
1963 	adev->gfx.config.num_gpus = 1;
1964 	adev->gfx.config.multi_gpu_tile_size = 64;
1965 
1966 	/* fix up row size */
1967 	switch (adev->gfx.config.mem_row_size_in_kb) {
1968 	case 1:
1969 	default:
1970 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1971 		break;
1972 	case 2:
1973 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1974 		break;
1975 	case 4:
1976 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1977 		break;
1978 	}
1979 	adev->gfx.config.gb_addr_config = gb_addr_config;
1980 
1981 	return 0;
1982 }
1983 
1984 static int gfx_v8_0_sw_init(void *handle)
1985 {
1986 	int i, r;
1987 	struct amdgpu_ring *ring;
1988 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1989 
1990 	/* EOP Event */
1991 	r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1992 	if (r)
1993 		return r;
1994 
1995 	/* Privileged reg */
1996 	r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1997 	if (r)
1998 		return r;
1999 
2000 	/* Privileged inst */
2001 	r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
2002 	if (r)
2003 		return r;
2004 
2005 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2006 
2007 	gfx_v8_0_scratch_init(adev);
2008 
2009 	r = gfx_v8_0_init_microcode(adev);
2010 	if (r) {
2011 		DRM_ERROR("Failed to load gfx firmware!\n");
2012 		return r;
2013 	}
2014 
2015 	r = gfx_v8_0_rlc_init(adev);
2016 	if (r) {
2017 		DRM_ERROR("Failed to init rlc BOs!\n");
2018 		return r;
2019 	}
2020 
2021 	r = gfx_v8_0_mec_init(adev);
2022 	if (r) {
2023 		DRM_ERROR("Failed to init MEC BOs!\n");
2024 		return r;
2025 	}
2026 
2027 	/* set up the gfx ring */
2028 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2029 		ring = &adev->gfx.gfx_ring[i];
2030 		ring->ring_obj = NULL;
2031 		sprintf(ring->name, "gfx");
2032 		/* no gfx doorbells on iceland */
2033 		if (adev->asic_type != CHIP_TOPAZ) {
2034 			ring->use_doorbell = true;
2035 			ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2036 		}
2037 
2038 		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2039 				     AMDGPU_CP_IRQ_GFX_EOP);
2040 		if (r)
2041 			return r;
2042 	}
2043 
2044 	/* set up the compute queues */
2045 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2046 		unsigned irq_type;
2047 
2048 		/* max 32 queues per MEC */
2049 		if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2050 			DRM_ERROR("Too many (%d) compute rings!\n", i);
2051 			break;
2052 		}
2053 		ring = &adev->gfx.compute_ring[i];
2054 		ring->ring_obj = NULL;
2055 		ring->use_doorbell = true;
2056 		ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2057 		ring->me = 1; /* first MEC */
2058 		ring->pipe = i / 8;
2059 		ring->queue = i % 8;
2060 		sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2061 		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2062 		/* type-2 packets are deprecated on MEC, use type-3 instead */
2063 		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2064 				     irq_type);
2065 		if (r)
2066 			return r;
2067 	}
2068 
2069 	/* reserve GDS, GWS and OA resource for gfx */
2070 	r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2071 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2072 				    &adev->gds.gds_gfx_bo, NULL, NULL);
2073 	if (r)
2074 		return r;
2075 
2076 	r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2077 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2078 				    &adev->gds.gws_gfx_bo, NULL, NULL);
2079 	if (r)
2080 		return r;
2081 
2082 	r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2083 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2084 				    &adev->gds.oa_gfx_bo, NULL, NULL);
2085 	if (r)
2086 		return r;
2087 
2088 	adev->gfx.ce_ram_size = 0x8000;
2089 
2090 	r = gfx_v8_0_gpu_early_init(adev);
2091 	if (r)
2092 		return r;
2093 
2094 	return 0;
2095 }
2096 
2097 static int gfx_v8_0_sw_fini(void *handle)
2098 {
2099 	int i;
2100 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2101 
2102 	amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2103 	amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2104 	amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2105 
2106 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2107 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2108 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2109 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2110 
2111 	gfx_v8_0_mec_fini(adev);
2112 	gfx_v8_0_rlc_fini(adev);
2113 	gfx_v8_0_free_microcode(adev);
2114 
2115 	return 0;
2116 }
2117 
2118 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2119 {
2120 	uint32_t *modearray, *mod2array;
2121 	const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2122 	const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2123 	u32 reg_offset;
2124 
2125 	modearray = adev->gfx.config.tile_mode_array;
2126 	mod2array = adev->gfx.config.macrotile_mode_array;
2127 
2128 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2129 		modearray[reg_offset] = 0;
2130 
2131 	for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2132 		mod2array[reg_offset] = 0;
2133 
2134 	switch (adev->asic_type) {
2135 	case CHIP_TOPAZ:
2136 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2137 				PIPE_CONFIG(ADDR_SURF_P2) |
2138 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2139 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2140 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2141 				PIPE_CONFIG(ADDR_SURF_P2) |
2142 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2143 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2144 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2145 				PIPE_CONFIG(ADDR_SURF_P2) |
2146 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2147 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2148 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2149 				PIPE_CONFIG(ADDR_SURF_P2) |
2150 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2151 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2152 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2153 				PIPE_CONFIG(ADDR_SURF_P2) |
2154 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2155 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2156 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2157 				PIPE_CONFIG(ADDR_SURF_P2) |
2158 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2159 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2160 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2161 				PIPE_CONFIG(ADDR_SURF_P2) |
2162 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2163 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2164 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2165 				PIPE_CONFIG(ADDR_SURF_P2));
2166 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2167 				PIPE_CONFIG(ADDR_SURF_P2) |
2168 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2169 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2170 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2171 				 PIPE_CONFIG(ADDR_SURF_P2) |
2172 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2173 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2174 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2175 				 PIPE_CONFIG(ADDR_SURF_P2) |
2176 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2177 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2178 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2179 				 PIPE_CONFIG(ADDR_SURF_P2) |
2180 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2181 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2182 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2183 				 PIPE_CONFIG(ADDR_SURF_P2) |
2184 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2185 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2186 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2187 				 PIPE_CONFIG(ADDR_SURF_P2) |
2188 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2189 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2190 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2191 				 PIPE_CONFIG(ADDR_SURF_P2) |
2192 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2193 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2194 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2195 				 PIPE_CONFIG(ADDR_SURF_P2) |
2196 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2197 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2198 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2199 				 PIPE_CONFIG(ADDR_SURF_P2) |
2200 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2201 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2202 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2203 				 PIPE_CONFIG(ADDR_SURF_P2) |
2204 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2205 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2206 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2207 				 PIPE_CONFIG(ADDR_SURF_P2) |
2208 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2209 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2210 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2211 				 PIPE_CONFIG(ADDR_SURF_P2) |
2212 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2213 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2214 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2215 				 PIPE_CONFIG(ADDR_SURF_P2) |
2216 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2217 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2218 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2219 				 PIPE_CONFIG(ADDR_SURF_P2) |
2220 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2221 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2222 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2223 				 PIPE_CONFIG(ADDR_SURF_P2) |
2224 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2225 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2226 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2227 				 PIPE_CONFIG(ADDR_SURF_P2) |
2228 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2229 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2230 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2231 				 PIPE_CONFIG(ADDR_SURF_P2) |
2232 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2233 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2234 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2235 				 PIPE_CONFIG(ADDR_SURF_P2) |
2236 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2237 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2238 
2239 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2240 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2241 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2242 				NUM_BANKS(ADDR_SURF_8_BANK));
2243 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2244 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2245 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2246 				NUM_BANKS(ADDR_SURF_8_BANK));
2247 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2248 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2249 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2250 				NUM_BANKS(ADDR_SURF_8_BANK));
2251 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2252 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2253 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2254 				NUM_BANKS(ADDR_SURF_8_BANK));
2255 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2256 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2257 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2258 				NUM_BANKS(ADDR_SURF_8_BANK));
2259 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2260 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2261 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2262 				NUM_BANKS(ADDR_SURF_8_BANK));
2263 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2264 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2265 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2266 				NUM_BANKS(ADDR_SURF_8_BANK));
2267 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2268 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2269 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2270 				NUM_BANKS(ADDR_SURF_16_BANK));
2271 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2272 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2273 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2274 				NUM_BANKS(ADDR_SURF_16_BANK));
2275 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2276 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2277 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2278 				 NUM_BANKS(ADDR_SURF_16_BANK));
2279 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2280 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2281 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2282 				 NUM_BANKS(ADDR_SURF_16_BANK));
2283 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2284 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2285 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2286 				 NUM_BANKS(ADDR_SURF_16_BANK));
2287 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2288 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2289 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2290 				 NUM_BANKS(ADDR_SURF_16_BANK));
2291 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2292 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2293 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2294 				 NUM_BANKS(ADDR_SURF_8_BANK));
2295 
2296 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2297 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2298 			    reg_offset != 23)
2299 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2300 
2301 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2302 			if (reg_offset != 7)
2303 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2304 
2305 		break;
2306 	case CHIP_FIJI:
2307 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2308 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2309 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2310 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2311 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2312 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2313 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2314 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2315 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2316 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2317 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2318 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2319 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2320 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2321 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2322 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2323 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2324 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2325 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2326 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2327 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2328 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2329 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2330 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2331 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2332 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2333 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2334 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2335 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2336 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2337 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2338 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2339 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2340 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2341 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2342 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2343 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2344 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2345 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2346 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2347 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2348 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2349 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2350 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2351 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2352 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2353 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2354 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2355 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2356 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2357 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2358 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2359 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2360 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2361 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2362 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2363 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2364 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2365 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2366 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2367 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2368 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2369 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2370 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2371 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2372 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2373 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2374 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2375 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2376 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2377 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2378 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2379 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2380 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2381 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2382 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2383 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2384 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2385 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2386 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2387 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2388 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2389 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2390 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2391 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2392 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2393 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2394 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2395 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2396 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2397 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2398 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2399 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2400 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2401 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2402 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2403 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2404 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2405 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2406 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2407 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2408 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2409 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2410 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2411 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2412 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2413 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2414 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2415 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2416 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2417 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2418 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2419 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2420 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2421 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2422 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2423 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2424 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2425 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2426 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2427 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2428 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2429 
2430 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2431 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2432 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2433 				NUM_BANKS(ADDR_SURF_8_BANK));
2434 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2435 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2436 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2437 				NUM_BANKS(ADDR_SURF_8_BANK));
2438 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2439 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2440 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2441 				NUM_BANKS(ADDR_SURF_8_BANK));
2442 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2443 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2444 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2445 				NUM_BANKS(ADDR_SURF_8_BANK));
2446 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2447 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2448 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2449 				NUM_BANKS(ADDR_SURF_8_BANK));
2450 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2451 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2452 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2453 				NUM_BANKS(ADDR_SURF_8_BANK));
2454 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2455 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2456 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2457 				NUM_BANKS(ADDR_SURF_8_BANK));
2458 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2459 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2460 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2461 				NUM_BANKS(ADDR_SURF_8_BANK));
2462 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2463 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2464 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2465 				NUM_BANKS(ADDR_SURF_8_BANK));
2466 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2467 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2468 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2469 				 NUM_BANKS(ADDR_SURF_8_BANK));
2470 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2471 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2472 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2473 				 NUM_BANKS(ADDR_SURF_8_BANK));
2474 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2475 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2476 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2477 				 NUM_BANKS(ADDR_SURF_8_BANK));
2478 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2480 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2481 				 NUM_BANKS(ADDR_SURF_8_BANK));
2482 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2483 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2484 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2485 				 NUM_BANKS(ADDR_SURF_4_BANK));
2486 
2487 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2488 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2489 
2490 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2491 			if (reg_offset != 7)
2492 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2493 
2494 		break;
2495 	case CHIP_TONGA:
2496 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2497 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2498 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2499 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2500 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2501 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2502 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2503 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2504 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2505 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2506 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2507 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2508 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2509 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2510 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2511 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2512 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2513 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2514 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2515 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2516 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2517 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2518 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2519 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2520 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2521 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2522 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2523 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2524 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2525 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2526 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2527 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2528 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2529 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2530 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2531 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2532 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2533 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2534 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2535 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2536 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2537 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2538 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2539 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2540 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2541 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2542 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2543 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2544 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2545 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2546 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2547 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2549 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2550 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2551 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2552 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2553 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2554 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2555 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2556 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2557 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2558 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2559 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2560 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2561 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2562 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2563 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2564 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2565 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2566 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2567 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2568 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2569 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2570 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2571 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2572 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2573 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2574 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2575 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2576 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2577 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2578 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2579 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2580 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2581 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2582 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2583 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2584 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2585 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2586 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2587 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2588 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2589 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2590 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2591 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2592 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2593 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2594 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2595 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2596 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2597 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2598 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2599 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2600 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2601 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2602 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2603 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2604 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2605 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2606 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2607 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2608 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2609 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2610 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2611 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2612 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2613 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2614 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2615 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2616 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2617 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2618 
2619 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2621 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2622 				NUM_BANKS(ADDR_SURF_16_BANK));
2623 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2624 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2625 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2626 				NUM_BANKS(ADDR_SURF_16_BANK));
2627 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2629 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2630 				NUM_BANKS(ADDR_SURF_16_BANK));
2631 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2633 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2634 				NUM_BANKS(ADDR_SURF_16_BANK));
2635 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2636 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2637 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2638 				NUM_BANKS(ADDR_SURF_16_BANK));
2639 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2641 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2642 				NUM_BANKS(ADDR_SURF_16_BANK));
2643 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2644 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2645 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2646 				NUM_BANKS(ADDR_SURF_16_BANK));
2647 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2648 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2649 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2650 				NUM_BANKS(ADDR_SURF_16_BANK));
2651 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2652 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2653 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2654 				NUM_BANKS(ADDR_SURF_16_BANK));
2655 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2656 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2657 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2658 				 NUM_BANKS(ADDR_SURF_16_BANK));
2659 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2660 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2661 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2662 				 NUM_BANKS(ADDR_SURF_16_BANK));
2663 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2664 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2665 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2666 				 NUM_BANKS(ADDR_SURF_8_BANK));
2667 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2668 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2669 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2670 				 NUM_BANKS(ADDR_SURF_4_BANK));
2671 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2672 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2673 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2674 				 NUM_BANKS(ADDR_SURF_4_BANK));
2675 
2676 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2677 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2678 
2679 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2680 			if (reg_offset != 7)
2681 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2682 
2683 		break;
2684 	case CHIP_POLARIS11:
2685 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2686 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2688 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2689 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2690 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2692 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2693 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2694 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2695 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2696 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2697 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2698 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2699 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2700 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2701 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2702 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2703 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2704 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2705 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2706 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2708 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2709 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2710 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2712 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2713 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2714 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2715 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2716 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2717 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2718 				PIPE_CONFIG(ADDR_SURF_P4_16x16));
2719 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2720 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2721 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2722 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2723 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2724 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2725 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2726 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2727 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2728 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2729 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2730 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2731 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2732 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2733 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2734 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2735 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2736 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2737 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2738 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2739 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2740 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2741 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2742 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2743 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2744 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2745 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2746 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2747 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2748 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2749 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2750 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2751 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2752 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2753 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2754 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2755 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2756 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2757 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2758 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2759 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2760 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2761 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2762 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2763 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2764 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2765 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2766 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2767 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2768 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2769 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2770 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2771 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2772 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2773 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2774 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2775 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2776 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2777 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2778 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2779 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2780 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2781 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2782 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2783 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2784 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2785 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2786 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2787 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2788 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2789 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2790 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2791 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2792 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2793 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2794 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2795 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2796 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2797 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2798 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2799 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2800 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2801 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2802 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2803 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2804 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2805 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2806 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2807 
2808 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2809 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2810 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2811 				NUM_BANKS(ADDR_SURF_16_BANK));
2812 
2813 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2814 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2815 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2816 				NUM_BANKS(ADDR_SURF_16_BANK));
2817 
2818 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2819 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2820 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2821 				NUM_BANKS(ADDR_SURF_16_BANK));
2822 
2823 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2824 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2825 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2826 				NUM_BANKS(ADDR_SURF_16_BANK));
2827 
2828 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2829 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2830 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2831 				NUM_BANKS(ADDR_SURF_16_BANK));
2832 
2833 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2834 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2835 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2836 				NUM_BANKS(ADDR_SURF_16_BANK));
2837 
2838 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2839 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2840 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2841 				NUM_BANKS(ADDR_SURF_16_BANK));
2842 
2843 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2844 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2845 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2846 				NUM_BANKS(ADDR_SURF_16_BANK));
2847 
2848 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2849 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2850 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2851 				NUM_BANKS(ADDR_SURF_16_BANK));
2852 
2853 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2854 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2855 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2856 				NUM_BANKS(ADDR_SURF_16_BANK));
2857 
2858 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2859 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2860 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2861 				NUM_BANKS(ADDR_SURF_16_BANK));
2862 
2863 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2864 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2865 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2866 				NUM_BANKS(ADDR_SURF_16_BANK));
2867 
2868 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2869 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2870 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2871 				NUM_BANKS(ADDR_SURF_8_BANK));
2872 
2873 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2874 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2875 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2876 				NUM_BANKS(ADDR_SURF_4_BANK));
2877 
2878 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2879 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2880 
2881 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2882 			if (reg_offset != 7)
2883 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2884 
2885 		break;
2886 	case CHIP_POLARIS10:
2887 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2888 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2889 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2890 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2891 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2892 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2893 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2894 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2895 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2896 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2897 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2898 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2899 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2900 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2901 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2902 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2903 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2904 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2905 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2906 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2907 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2908 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2909 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2910 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2911 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2912 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2913 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2914 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2915 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2916 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2917 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2918 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2919 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2920 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2921 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2922 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2923 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2924 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2925 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2926 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2927 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2928 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2929 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2930 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2931 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2932 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2933 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2934 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2935 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2936 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2937 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2938 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2939 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2940 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2941 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2942 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2943 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2944 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2945 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2946 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2947 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2948 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2949 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2950 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2951 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2952 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2953 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2954 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2955 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2956 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2957 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2958 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2959 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2960 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2961 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2962 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2963 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2964 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2965 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2966 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2967 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2968 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2969 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2970 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2971 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2972 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2973 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2974 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2975 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2976 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2977 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2978 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2979 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2980 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2981 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2982 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2983 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2984 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2985 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2986 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2987 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2988 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2989 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2990 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2991 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2992 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2993 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2994 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2995 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2996 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2997 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2998 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2999 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3000 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3001 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3002 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3003 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3004 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3005 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3006 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3007 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3008 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3009 
3010 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3011 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3012 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3013 				NUM_BANKS(ADDR_SURF_16_BANK));
3014 
3015 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3016 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3017 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3018 				NUM_BANKS(ADDR_SURF_16_BANK));
3019 
3020 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3021 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3022 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3023 				NUM_BANKS(ADDR_SURF_16_BANK));
3024 
3025 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3026 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3027 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3028 				NUM_BANKS(ADDR_SURF_16_BANK));
3029 
3030 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3031 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3032 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3033 				NUM_BANKS(ADDR_SURF_16_BANK));
3034 
3035 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3036 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3037 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3038 				NUM_BANKS(ADDR_SURF_16_BANK));
3039 
3040 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3041 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3042 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3043 				NUM_BANKS(ADDR_SURF_16_BANK));
3044 
3045 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3046 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3047 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3048 				NUM_BANKS(ADDR_SURF_16_BANK));
3049 
3050 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3051 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3052 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3053 				NUM_BANKS(ADDR_SURF_16_BANK));
3054 
3055 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3056 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3057 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3058 				NUM_BANKS(ADDR_SURF_16_BANK));
3059 
3060 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3061 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3062 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3063 				NUM_BANKS(ADDR_SURF_16_BANK));
3064 
3065 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3066 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3067 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3068 				NUM_BANKS(ADDR_SURF_8_BANK));
3069 
3070 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3071 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3072 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3073 				NUM_BANKS(ADDR_SURF_4_BANK));
3074 
3075 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3076 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3077 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3078 				NUM_BANKS(ADDR_SURF_4_BANK));
3079 
3080 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3081 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3082 
3083 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3084 			if (reg_offset != 7)
3085 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3086 
3087 		break;
3088 	case CHIP_STONEY:
3089 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3090 				PIPE_CONFIG(ADDR_SURF_P2) |
3091 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3092 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3093 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3094 				PIPE_CONFIG(ADDR_SURF_P2) |
3095 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3096 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3097 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3098 				PIPE_CONFIG(ADDR_SURF_P2) |
3099 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3100 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3101 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3102 				PIPE_CONFIG(ADDR_SURF_P2) |
3103 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3104 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3105 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3106 				PIPE_CONFIG(ADDR_SURF_P2) |
3107 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3108 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3109 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3110 				PIPE_CONFIG(ADDR_SURF_P2) |
3111 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3112 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3113 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3114 				PIPE_CONFIG(ADDR_SURF_P2) |
3115 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3116 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3117 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3118 				PIPE_CONFIG(ADDR_SURF_P2));
3119 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3120 				PIPE_CONFIG(ADDR_SURF_P2) |
3121 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3122 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3123 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3124 				 PIPE_CONFIG(ADDR_SURF_P2) |
3125 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3126 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3127 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3128 				 PIPE_CONFIG(ADDR_SURF_P2) |
3129 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3130 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3131 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3132 				 PIPE_CONFIG(ADDR_SURF_P2) |
3133 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3134 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3135 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3136 				 PIPE_CONFIG(ADDR_SURF_P2) |
3137 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3138 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3139 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3140 				 PIPE_CONFIG(ADDR_SURF_P2) |
3141 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3142 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3143 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3144 				 PIPE_CONFIG(ADDR_SURF_P2) |
3145 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3146 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3147 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3148 				 PIPE_CONFIG(ADDR_SURF_P2) |
3149 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3150 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3151 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3152 				 PIPE_CONFIG(ADDR_SURF_P2) |
3153 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3154 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3155 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3156 				 PIPE_CONFIG(ADDR_SURF_P2) |
3157 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3158 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3159 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3160 				 PIPE_CONFIG(ADDR_SURF_P2) |
3161 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3162 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3163 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3164 				 PIPE_CONFIG(ADDR_SURF_P2) |
3165 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3166 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3167 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3168 				 PIPE_CONFIG(ADDR_SURF_P2) |
3169 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3170 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3171 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3172 				 PIPE_CONFIG(ADDR_SURF_P2) |
3173 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3174 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3175 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3176 				 PIPE_CONFIG(ADDR_SURF_P2) |
3177 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3178 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3179 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3180 				 PIPE_CONFIG(ADDR_SURF_P2) |
3181 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3182 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3183 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3184 				 PIPE_CONFIG(ADDR_SURF_P2) |
3185 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3186 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3187 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3188 				 PIPE_CONFIG(ADDR_SURF_P2) |
3189 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3190 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3191 
3192 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3193 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3194 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3195 				NUM_BANKS(ADDR_SURF_8_BANK));
3196 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3197 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3198 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3199 				NUM_BANKS(ADDR_SURF_8_BANK));
3200 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3201 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3202 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3203 				NUM_BANKS(ADDR_SURF_8_BANK));
3204 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3205 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3206 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3207 				NUM_BANKS(ADDR_SURF_8_BANK));
3208 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3209 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3210 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3211 				NUM_BANKS(ADDR_SURF_8_BANK));
3212 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3213 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3214 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3215 				NUM_BANKS(ADDR_SURF_8_BANK));
3216 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3217 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3218 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3219 				NUM_BANKS(ADDR_SURF_8_BANK));
3220 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3221 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3222 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3223 				NUM_BANKS(ADDR_SURF_16_BANK));
3224 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3225 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3226 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3227 				NUM_BANKS(ADDR_SURF_16_BANK));
3228 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3229 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3230 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3231 				 NUM_BANKS(ADDR_SURF_16_BANK));
3232 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3233 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3234 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3235 				 NUM_BANKS(ADDR_SURF_16_BANK));
3236 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3237 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3238 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3239 				 NUM_BANKS(ADDR_SURF_16_BANK));
3240 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3241 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3242 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3243 				 NUM_BANKS(ADDR_SURF_16_BANK));
3244 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3245 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3246 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3247 				 NUM_BANKS(ADDR_SURF_8_BANK));
3248 
3249 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3250 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3251 			    reg_offset != 23)
3252 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3253 
3254 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3255 			if (reg_offset != 7)
3256 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3257 
3258 		break;
3259 	default:
3260 		dev_warn(adev->dev,
3261 			 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3262 			 adev->asic_type);
3263 
3264 	case CHIP_CARRIZO:
3265 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3266 				PIPE_CONFIG(ADDR_SURF_P2) |
3267 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3268 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3269 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3270 				PIPE_CONFIG(ADDR_SURF_P2) |
3271 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3272 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3273 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3274 				PIPE_CONFIG(ADDR_SURF_P2) |
3275 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3276 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3277 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3278 				PIPE_CONFIG(ADDR_SURF_P2) |
3279 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3280 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3281 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3282 				PIPE_CONFIG(ADDR_SURF_P2) |
3283 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3284 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3285 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3286 				PIPE_CONFIG(ADDR_SURF_P2) |
3287 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3288 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3289 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3290 				PIPE_CONFIG(ADDR_SURF_P2) |
3291 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3292 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3293 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3294 				PIPE_CONFIG(ADDR_SURF_P2));
3295 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3296 				PIPE_CONFIG(ADDR_SURF_P2) |
3297 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3298 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3299 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3300 				 PIPE_CONFIG(ADDR_SURF_P2) |
3301 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3302 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3303 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3304 				 PIPE_CONFIG(ADDR_SURF_P2) |
3305 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3306 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3307 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3308 				 PIPE_CONFIG(ADDR_SURF_P2) |
3309 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3310 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3311 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3312 				 PIPE_CONFIG(ADDR_SURF_P2) |
3313 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3314 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3315 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3316 				 PIPE_CONFIG(ADDR_SURF_P2) |
3317 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3318 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3319 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3320 				 PIPE_CONFIG(ADDR_SURF_P2) |
3321 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3322 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3323 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3324 				 PIPE_CONFIG(ADDR_SURF_P2) |
3325 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3326 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3327 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3328 				 PIPE_CONFIG(ADDR_SURF_P2) |
3329 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3330 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3331 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3332 				 PIPE_CONFIG(ADDR_SURF_P2) |
3333 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3334 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3335 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3336 				 PIPE_CONFIG(ADDR_SURF_P2) |
3337 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3338 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3339 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3340 				 PIPE_CONFIG(ADDR_SURF_P2) |
3341 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3342 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3343 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3344 				 PIPE_CONFIG(ADDR_SURF_P2) |
3345 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3346 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3347 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3348 				 PIPE_CONFIG(ADDR_SURF_P2) |
3349 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3350 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3351 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3352 				 PIPE_CONFIG(ADDR_SURF_P2) |
3353 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3354 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3355 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3356 				 PIPE_CONFIG(ADDR_SURF_P2) |
3357 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3358 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3359 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3360 				 PIPE_CONFIG(ADDR_SURF_P2) |
3361 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3362 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3363 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3364 				 PIPE_CONFIG(ADDR_SURF_P2) |
3365 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3366 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3367 
3368 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3369 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3370 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3371 				NUM_BANKS(ADDR_SURF_8_BANK));
3372 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3373 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3374 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3375 				NUM_BANKS(ADDR_SURF_8_BANK));
3376 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3377 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3378 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3379 				NUM_BANKS(ADDR_SURF_8_BANK));
3380 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3381 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3382 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3383 				NUM_BANKS(ADDR_SURF_8_BANK));
3384 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3385 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3386 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3387 				NUM_BANKS(ADDR_SURF_8_BANK));
3388 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3389 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3390 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3391 				NUM_BANKS(ADDR_SURF_8_BANK));
3392 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3393 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3394 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3395 				NUM_BANKS(ADDR_SURF_8_BANK));
3396 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3397 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3398 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3399 				NUM_BANKS(ADDR_SURF_16_BANK));
3400 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3401 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3402 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3403 				NUM_BANKS(ADDR_SURF_16_BANK));
3404 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3405 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3406 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3407 				 NUM_BANKS(ADDR_SURF_16_BANK));
3408 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3409 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3410 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3411 				 NUM_BANKS(ADDR_SURF_16_BANK));
3412 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3413 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3414 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3415 				 NUM_BANKS(ADDR_SURF_16_BANK));
3416 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3417 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3418 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3419 				 NUM_BANKS(ADDR_SURF_16_BANK));
3420 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3421 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3422 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3423 				 NUM_BANKS(ADDR_SURF_8_BANK));
3424 
3425 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3426 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3427 			    reg_offset != 23)
3428 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3429 
3430 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3431 			if (reg_offset != 7)
3432 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3433 
3434 		break;
3435 	}
3436 }
3437 
3438 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3439 				  u32 se_num, u32 sh_num, u32 instance)
3440 {
3441 	u32 data;
3442 
3443 	if (instance == 0xffffffff)
3444 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3445 	else
3446 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3447 
3448 	if (se_num == 0xffffffff)
3449 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3450 	else
3451 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3452 
3453 	if (sh_num == 0xffffffff)
3454 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3455 	else
3456 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3457 
3458 	WREG32(mmGRBM_GFX_INDEX, data);
3459 }
3460 
3461 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3462 {
3463 	return (u32)((1ULL << bit_width) - 1);
3464 }
3465 
3466 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3467 {
3468 	u32 data, mask;
3469 
3470 	data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3471 		RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3472 
3473 	data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3474 
3475 	mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3476 				       adev->gfx.config.max_sh_per_se);
3477 
3478 	return (~data) & mask;
3479 }
3480 
3481 static void
3482 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3483 {
3484 	switch (adev->asic_type) {
3485 	case CHIP_FIJI:
3486 		*rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3487 			  RB_XSEL2(1) | PKR_MAP(2) |
3488 			  PKR_XSEL(1) | PKR_YSEL(1) |
3489 			  SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3490 		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3491 			   SE_PAIR_YSEL(2);
3492 		break;
3493 	case CHIP_TONGA:
3494 	case CHIP_POLARIS10:
3495 		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3496 			  SE_XSEL(1) | SE_YSEL(1);
3497 		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3498 			   SE_PAIR_YSEL(2);
3499 		break;
3500 	case CHIP_TOPAZ:
3501 	case CHIP_CARRIZO:
3502 		*rconf |= RB_MAP_PKR0(2);
3503 		*rconf1 |= 0x0;
3504 		break;
3505 	case CHIP_POLARIS11:
3506 		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3507 			  SE_XSEL(1) | SE_YSEL(1);
3508 		*rconf1 |= 0x0;
3509 		break;
3510 	case CHIP_STONEY:
3511 		*rconf |= 0x0;
3512 		*rconf1 |= 0x0;
3513 		break;
3514 	default:
3515 		DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3516 		break;
3517 	}
3518 }
3519 
3520 static void
3521 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3522 					u32 raster_config, u32 raster_config_1,
3523 					unsigned rb_mask, unsigned num_rb)
3524 {
3525 	unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3526 	unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3527 	unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3528 	unsigned rb_per_se = num_rb / num_se;
3529 	unsigned se_mask[4];
3530 	unsigned se;
3531 
3532 	se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3533 	se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3534 	se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3535 	se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3536 
3537 	WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3538 	WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3539 	WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3540 
3541 	if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3542 			     (!se_mask[2] && !se_mask[3]))) {
3543 		raster_config_1 &= ~SE_PAIR_MAP_MASK;
3544 
3545 		if (!se_mask[0] && !se_mask[1]) {
3546 			raster_config_1 |=
3547 				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3548 		} else {
3549 			raster_config_1 |=
3550 				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3551 		}
3552 	}
3553 
3554 	for (se = 0; se < num_se; se++) {
3555 		unsigned raster_config_se = raster_config;
3556 		unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3557 		unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3558 		int idx = (se / 2) * 2;
3559 
3560 		if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3561 			raster_config_se &= ~SE_MAP_MASK;
3562 
3563 			if (!se_mask[idx]) {
3564 				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3565 			} else {
3566 				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3567 			}
3568 		}
3569 
3570 		pkr0_mask &= rb_mask;
3571 		pkr1_mask &= rb_mask;
3572 		if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3573 			raster_config_se &= ~PKR_MAP_MASK;
3574 
3575 			if (!pkr0_mask) {
3576 				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3577 			} else {
3578 				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3579 			}
3580 		}
3581 
3582 		if (rb_per_se >= 2) {
3583 			unsigned rb0_mask = 1 << (se * rb_per_se);
3584 			unsigned rb1_mask = rb0_mask << 1;
3585 
3586 			rb0_mask &= rb_mask;
3587 			rb1_mask &= rb_mask;
3588 			if (!rb0_mask || !rb1_mask) {
3589 				raster_config_se &= ~RB_MAP_PKR0_MASK;
3590 
3591 				if (!rb0_mask) {
3592 					raster_config_se |=
3593 						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3594 				} else {
3595 					raster_config_se |=
3596 						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3597 				}
3598 			}
3599 
3600 			if (rb_per_se > 2) {
3601 				rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3602 				rb1_mask = rb0_mask << 1;
3603 				rb0_mask &= rb_mask;
3604 				rb1_mask &= rb_mask;
3605 				if (!rb0_mask || !rb1_mask) {
3606 					raster_config_se &= ~RB_MAP_PKR1_MASK;
3607 
3608 					if (!rb0_mask) {
3609 						raster_config_se |=
3610 							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3611 					} else {
3612 						raster_config_se |=
3613 							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3614 					}
3615 				}
3616 			}
3617 		}
3618 
3619 		/* GRBM_GFX_INDEX has a different offset on VI */
3620 		gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3621 		WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3622 		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3623 	}
3624 
3625 	/* GRBM_GFX_INDEX has a different offset on VI */
3626 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3627 }
3628 
3629 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3630 {
3631 	int i, j;
3632 	u32 data;
3633 	u32 raster_config = 0, raster_config_1 = 0;
3634 	u32 active_rbs = 0;
3635 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3636 					adev->gfx.config.max_sh_per_se;
3637 	unsigned num_rb_pipes;
3638 
3639 	mutex_lock(&adev->grbm_idx_mutex);
3640 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3641 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3642 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3643 			data = gfx_v8_0_get_rb_active_bitmap(adev);
3644 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3645 					       rb_bitmap_width_per_sh);
3646 		}
3647 	}
3648 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3649 
3650 	adev->gfx.config.backend_enable_mask = active_rbs;
3651 	adev->gfx.config.num_rbs = hweight32(active_rbs);
3652 
3653 	num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3654 			     adev->gfx.config.max_shader_engines, 16);
3655 
3656 	gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3657 
3658 	if (!adev->gfx.config.backend_enable_mask ||
3659 			adev->gfx.config.num_rbs >= num_rb_pipes) {
3660 		WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3661 		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3662 	} else {
3663 		gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3664 							adev->gfx.config.backend_enable_mask,
3665 							num_rb_pipes);
3666 	}
3667 
3668 	/* cache the values for userspace */
3669 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3670 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3671 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3672 			adev->gfx.config.rb_config[i][j].rb_backend_disable =
3673 				RREG32(mmCC_RB_BACKEND_DISABLE);
3674 			adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3675 				RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3676 			adev->gfx.config.rb_config[i][j].raster_config =
3677 				RREG32(mmPA_SC_RASTER_CONFIG);
3678 			adev->gfx.config.rb_config[i][j].raster_config_1 =
3679 				RREG32(mmPA_SC_RASTER_CONFIG_1);
3680 		}
3681 	}
3682 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3683 	mutex_unlock(&adev->grbm_idx_mutex);
3684 }
3685 
3686 /**
3687  * gfx_v8_0_init_compute_vmid - gart enable
3688  *
3689  * @rdev: amdgpu_device pointer
3690  *
3691  * Initialize compute vmid sh_mem registers
3692  *
3693  */
3694 #define DEFAULT_SH_MEM_BASES	(0x6000)
3695 #define FIRST_COMPUTE_VMID	(8)
3696 #define LAST_COMPUTE_VMID	(16)
3697 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3698 {
3699 	int i;
3700 	uint32_t sh_mem_config;
3701 	uint32_t sh_mem_bases;
3702 
3703 	/*
3704 	 * Configure apertures:
3705 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3706 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3707 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3708 	 */
3709 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3710 
3711 	sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3712 			SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3713 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3714 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3715 			MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3716 			SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3717 
3718 	mutex_lock(&adev->srbm_mutex);
3719 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3720 		vi_srbm_select(adev, 0, 0, 0, i);
3721 		/* CP and shaders */
3722 		WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3723 		WREG32(mmSH_MEM_APE1_BASE, 1);
3724 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3725 		WREG32(mmSH_MEM_BASES, sh_mem_bases);
3726 	}
3727 	vi_srbm_select(adev, 0, 0, 0, 0);
3728 	mutex_unlock(&adev->srbm_mutex);
3729 }
3730 
3731 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3732 {
3733 	u32 tmp;
3734 	int i;
3735 
3736 	WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3737 	WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3738 	WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3739 	WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3740 
3741 	gfx_v8_0_tiling_mode_table_init(adev);
3742 	gfx_v8_0_setup_rb(adev);
3743 	gfx_v8_0_get_cu_info(adev);
3744 
3745 	/* XXX SH_MEM regs */
3746 	/* where to put LDS, scratch, GPUVM in FSA64 space */
3747 	mutex_lock(&adev->srbm_mutex);
3748 	for (i = 0; i < 16; i++) {
3749 		vi_srbm_select(adev, 0, 0, 0, i);
3750 		/* CP and shaders */
3751 		if (i == 0) {
3752 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3753 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3754 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3755 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3756 			WREG32(mmSH_MEM_CONFIG, tmp);
3757 		} else {
3758 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3759 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3760 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3761 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3762 			WREG32(mmSH_MEM_CONFIG, tmp);
3763 		}
3764 
3765 		WREG32(mmSH_MEM_APE1_BASE, 1);
3766 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3767 		WREG32(mmSH_MEM_BASES, 0);
3768 	}
3769 	vi_srbm_select(adev, 0, 0, 0, 0);
3770 	mutex_unlock(&adev->srbm_mutex);
3771 
3772 	gfx_v8_0_init_compute_vmid(adev);
3773 
3774 	mutex_lock(&adev->grbm_idx_mutex);
3775 	/*
3776 	 * making sure that the following register writes will be broadcasted
3777 	 * to all the shaders
3778 	 */
3779 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3780 
3781 	WREG32(mmPA_SC_FIFO_SIZE,
3782 		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
3783 			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3784 		   (adev->gfx.config.sc_prim_fifo_size_backend <<
3785 			PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3786 		   (adev->gfx.config.sc_hiz_tile_fifo_size <<
3787 			PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3788 		   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3789 			PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3790 	mutex_unlock(&adev->grbm_idx_mutex);
3791 
3792 }
3793 
3794 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3795 {
3796 	u32 i, j, k;
3797 	u32 mask;
3798 
3799 	mutex_lock(&adev->grbm_idx_mutex);
3800 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3801 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3802 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3803 			for (k = 0; k < adev->usec_timeout; k++) {
3804 				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3805 					break;
3806 				udelay(1);
3807 			}
3808 		}
3809 	}
3810 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3811 	mutex_unlock(&adev->grbm_idx_mutex);
3812 
3813 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3814 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3815 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3816 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3817 	for (k = 0; k < adev->usec_timeout; k++) {
3818 		if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3819 			break;
3820 		udelay(1);
3821 	}
3822 }
3823 
3824 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3825 					       bool enable)
3826 {
3827 	u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3828 
3829 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3830 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3831 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3832 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3833 
3834 	WREG32(mmCP_INT_CNTL_RING0, tmp);
3835 }
3836 
3837 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3838 {
3839 	/* csib */
3840 	WREG32(mmRLC_CSIB_ADDR_HI,
3841 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
3842 	WREG32(mmRLC_CSIB_ADDR_LO,
3843 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3844 	WREG32(mmRLC_CSIB_LENGTH,
3845 			adev->gfx.rlc.clear_state_size);
3846 }
3847 
3848 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3849 				int ind_offset,
3850 				int list_size,
3851 				int *unique_indices,
3852 				int *indices_count,
3853 				int max_indices,
3854 				int *ind_start_offsets,
3855 				int *offset_count,
3856 				int max_offset)
3857 {
3858 	int indices;
3859 	bool new_entry = true;
3860 
3861 	for (; ind_offset < list_size; ind_offset++) {
3862 
3863 		if (new_entry) {
3864 			new_entry = false;
3865 			ind_start_offsets[*offset_count] = ind_offset;
3866 			*offset_count = *offset_count + 1;
3867 			BUG_ON(*offset_count >= max_offset);
3868 		}
3869 
3870 		if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3871 			new_entry = true;
3872 			continue;
3873 		}
3874 
3875 		ind_offset += 2;
3876 
3877 		/* look for the matching indice */
3878 		for (indices = 0;
3879 			indices < *indices_count;
3880 			indices++) {
3881 			if (unique_indices[indices] ==
3882 				register_list_format[ind_offset])
3883 				break;
3884 		}
3885 
3886 		if (indices >= *indices_count) {
3887 			unique_indices[*indices_count] =
3888 				register_list_format[ind_offset];
3889 			indices = *indices_count;
3890 			*indices_count = *indices_count + 1;
3891 			BUG_ON(*indices_count >= max_indices);
3892 		}
3893 
3894 		register_list_format[ind_offset] = indices;
3895 	}
3896 }
3897 
3898 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3899 {
3900 	int i, temp, data;
3901 	int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3902 	int indices_count = 0;
3903 	int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3904 	int offset_count = 0;
3905 
3906 	int list_size;
3907 	unsigned int *register_list_format =
3908 		kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3909 	if (!register_list_format)
3910 		return -ENOMEM;
3911 	memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3912 			adev->gfx.rlc.reg_list_format_size_bytes);
3913 
3914 	gfx_v8_0_parse_ind_reg_list(register_list_format,
3915 				RLC_FormatDirectRegListLength,
3916 				adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3917 				unique_indices,
3918 				&indices_count,
3919 				sizeof(unique_indices) / sizeof(int),
3920 				indirect_start_offsets,
3921 				&offset_count,
3922 				sizeof(indirect_start_offsets)/sizeof(int));
3923 
3924 	/* save and restore list */
3925 	WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3926 
3927 	WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3928 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3929 		WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3930 
3931 	/* indirect list */
3932 	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3933 	for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3934 		WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3935 
3936 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3937 	list_size = list_size >> 1;
3938 	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3939 	WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3940 
3941 	/* starting offsets starts */
3942 	WREG32(mmRLC_GPM_SCRATCH_ADDR,
3943 		adev->gfx.rlc.starting_offsets_start);
3944 	for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3945 		WREG32(mmRLC_GPM_SCRATCH_DATA,
3946 				indirect_start_offsets[i]);
3947 
3948 	/* unique indices */
3949 	temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3950 	data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3951 	for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3952 		if (unique_indices[i] != 0) {
3953 			amdgpu_mm_wreg(adev, temp + i,
3954 					unique_indices[i] & 0x3FFFF, false);
3955 			amdgpu_mm_wreg(adev, data + i,
3956 					unique_indices[i] >> 20, false);
3957 		}
3958 	}
3959 	kfree(register_list_format);
3960 
3961 	return 0;
3962 }
3963 
3964 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3965 {
3966 	WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3967 }
3968 
3969 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3970 {
3971 	uint32_t data;
3972 
3973 	WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
3974 
3975 	data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
3976 	data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
3977 	data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
3978 	data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
3979 	WREG32(mmRLC_PG_DELAY, data);
3980 
3981 	WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
3982 	WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
3983 
3984 }
3985 
3986 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
3987 						bool enable)
3988 {
3989 	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
3990 }
3991 
3992 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
3993 						  bool enable)
3994 {
3995 	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
3996 }
3997 
3998 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
3999 {
4000 	WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4001 }
4002 
4003 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4004 {
4005 	if ((adev->asic_type == CHIP_CARRIZO) ||
4006 	    (adev->asic_type == CHIP_STONEY)) {
4007 		gfx_v8_0_init_csb(adev);
4008 		gfx_v8_0_init_save_restore_list(adev);
4009 		gfx_v8_0_enable_save_restore_machine(adev);
4010 		WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4011 		gfx_v8_0_init_power_gating(adev);
4012 		WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4013 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4014 			cz_enable_sck_slow_down_on_power_up(adev, true);
4015 			cz_enable_sck_slow_down_on_power_down(adev, true);
4016 		} else {
4017 			cz_enable_sck_slow_down_on_power_up(adev, false);
4018 			cz_enable_sck_slow_down_on_power_down(adev, false);
4019 		}
4020 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4021 			cz_enable_cp_power_gating(adev, true);
4022 		else
4023 			cz_enable_cp_power_gating(adev, false);
4024 	} else if (adev->asic_type == CHIP_POLARIS11) {
4025 		gfx_v8_0_init_csb(adev);
4026 		gfx_v8_0_init_save_restore_list(adev);
4027 		gfx_v8_0_enable_save_restore_machine(adev);
4028 		gfx_v8_0_init_power_gating(adev);
4029 	}
4030 
4031 }
4032 
4033 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4034 {
4035 	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4036 
4037 	gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4038 	gfx_v8_0_wait_for_rlc_serdes(adev);
4039 }
4040 
4041 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4042 {
4043 	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4044 	udelay(50);
4045 
4046 	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4047 	udelay(50);
4048 }
4049 
4050 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4051 {
4052 	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4053 
4054 	/* carrizo do enable cp interrupt after cp inited */
4055 	if (!(adev->flags & AMD_IS_APU))
4056 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4057 
4058 	udelay(50);
4059 }
4060 
4061 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4062 {
4063 	const struct rlc_firmware_header_v2_0 *hdr;
4064 	const __le32 *fw_data;
4065 	unsigned i, fw_size;
4066 
4067 	if (!adev->gfx.rlc_fw)
4068 		return -EINVAL;
4069 
4070 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4071 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
4072 
4073 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4074 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4075 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4076 
4077 	WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4078 	for (i = 0; i < fw_size; i++)
4079 		WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4080 	WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4081 
4082 	return 0;
4083 }
4084 
4085 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4086 {
4087 	int r;
4088 	u32 tmp;
4089 
4090 	gfx_v8_0_rlc_stop(adev);
4091 
4092 	/* disable CG */
4093 	tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4094 	tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4095 		 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4096 	WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4097 	if (adev->asic_type == CHIP_POLARIS11 ||
4098 	    adev->asic_type == CHIP_POLARIS10) {
4099 		tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4100 		tmp &= ~0x3;
4101 		WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4102 	}
4103 
4104 	/* disable PG */
4105 	WREG32(mmRLC_PG_CNTL, 0);
4106 
4107 	gfx_v8_0_rlc_reset(adev);
4108 	gfx_v8_0_init_pg(adev);
4109 
4110 	if (!adev->pp_enabled) {
4111 		if (!adev->firmware.smu_load) {
4112 			/* legacy rlc firmware loading */
4113 			r = gfx_v8_0_rlc_load_microcode(adev);
4114 			if (r)
4115 				return r;
4116 		} else {
4117 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4118 							AMDGPU_UCODE_ID_RLC_G);
4119 			if (r)
4120 				return -EINVAL;
4121 		}
4122 	}
4123 
4124 	gfx_v8_0_rlc_start(adev);
4125 
4126 	return 0;
4127 }
4128 
4129 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4130 {
4131 	int i;
4132 	u32 tmp = RREG32(mmCP_ME_CNTL);
4133 
4134 	if (enable) {
4135 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4136 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4137 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4138 	} else {
4139 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4140 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4141 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4142 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4143 			adev->gfx.gfx_ring[i].ready = false;
4144 	}
4145 	WREG32(mmCP_ME_CNTL, tmp);
4146 	udelay(50);
4147 }
4148 
4149 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4150 {
4151 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
4152 	const struct gfx_firmware_header_v1_0 *ce_hdr;
4153 	const struct gfx_firmware_header_v1_0 *me_hdr;
4154 	const __le32 *fw_data;
4155 	unsigned i, fw_size;
4156 
4157 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4158 		return -EINVAL;
4159 
4160 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4161 		adev->gfx.pfp_fw->data;
4162 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4163 		adev->gfx.ce_fw->data;
4164 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
4165 		adev->gfx.me_fw->data;
4166 
4167 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4168 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4169 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4170 
4171 	gfx_v8_0_cp_gfx_enable(adev, false);
4172 
4173 	/* PFP */
4174 	fw_data = (const __le32 *)
4175 		(adev->gfx.pfp_fw->data +
4176 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4177 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4178 	WREG32(mmCP_PFP_UCODE_ADDR, 0);
4179 	for (i = 0; i < fw_size; i++)
4180 		WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4181 	WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4182 
4183 	/* CE */
4184 	fw_data = (const __le32 *)
4185 		(adev->gfx.ce_fw->data +
4186 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4187 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4188 	WREG32(mmCP_CE_UCODE_ADDR, 0);
4189 	for (i = 0; i < fw_size; i++)
4190 		WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4191 	WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4192 
4193 	/* ME */
4194 	fw_data = (const __le32 *)
4195 		(adev->gfx.me_fw->data +
4196 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4197 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4198 	WREG32(mmCP_ME_RAM_WADDR, 0);
4199 	for (i = 0; i < fw_size; i++)
4200 		WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4201 	WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4202 
4203 	return 0;
4204 }
4205 
4206 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4207 {
4208 	u32 count = 0;
4209 	const struct cs_section_def *sect = NULL;
4210 	const struct cs_extent_def *ext = NULL;
4211 
4212 	/* begin clear state */
4213 	count += 2;
4214 	/* context control state */
4215 	count += 3;
4216 
4217 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4218 		for (ext = sect->section; ext->extent != NULL; ++ext) {
4219 			if (sect->id == SECT_CONTEXT)
4220 				count += 2 + ext->reg_count;
4221 			else
4222 				return 0;
4223 		}
4224 	}
4225 	/* pa_sc_raster_config/pa_sc_raster_config1 */
4226 	count += 4;
4227 	/* end clear state */
4228 	count += 2;
4229 	/* clear state */
4230 	count += 2;
4231 
4232 	return count;
4233 }
4234 
4235 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4236 {
4237 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4238 	const struct cs_section_def *sect = NULL;
4239 	const struct cs_extent_def *ext = NULL;
4240 	int r, i;
4241 
4242 	/* init the CP */
4243 	WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4244 	WREG32(mmCP_ENDIAN_SWAP, 0);
4245 	WREG32(mmCP_DEVICE_ID, 1);
4246 
4247 	gfx_v8_0_cp_gfx_enable(adev, true);
4248 
4249 	r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4250 	if (r) {
4251 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4252 		return r;
4253 	}
4254 
4255 	/* clear state buffer */
4256 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4257 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4258 
4259 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4260 	amdgpu_ring_write(ring, 0x80000000);
4261 	amdgpu_ring_write(ring, 0x80000000);
4262 
4263 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4264 		for (ext = sect->section; ext->extent != NULL; ++ext) {
4265 			if (sect->id == SECT_CONTEXT) {
4266 				amdgpu_ring_write(ring,
4267 				       PACKET3(PACKET3_SET_CONTEXT_REG,
4268 					       ext->reg_count));
4269 				amdgpu_ring_write(ring,
4270 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4271 				for (i = 0; i < ext->reg_count; i++)
4272 					amdgpu_ring_write(ring, ext->extent[i]);
4273 			}
4274 		}
4275 	}
4276 
4277 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4278 	amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4279 	switch (adev->asic_type) {
4280 	case CHIP_TONGA:
4281 	case CHIP_POLARIS10:
4282 		amdgpu_ring_write(ring, 0x16000012);
4283 		amdgpu_ring_write(ring, 0x0000002A);
4284 		break;
4285 	case CHIP_POLARIS11:
4286 		amdgpu_ring_write(ring, 0x16000012);
4287 		amdgpu_ring_write(ring, 0x00000000);
4288 		break;
4289 	case CHIP_FIJI:
4290 		amdgpu_ring_write(ring, 0x3a00161a);
4291 		amdgpu_ring_write(ring, 0x0000002e);
4292 		break;
4293 	case CHIP_CARRIZO:
4294 		amdgpu_ring_write(ring, 0x00000002);
4295 		amdgpu_ring_write(ring, 0x00000000);
4296 		break;
4297 	case CHIP_TOPAZ:
4298 		amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4299 				0x00000000 : 0x00000002);
4300 		amdgpu_ring_write(ring, 0x00000000);
4301 		break;
4302 	case CHIP_STONEY:
4303 		amdgpu_ring_write(ring, 0x00000000);
4304 		amdgpu_ring_write(ring, 0x00000000);
4305 		break;
4306 	default:
4307 		BUG();
4308 	}
4309 
4310 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4311 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4312 
4313 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4314 	amdgpu_ring_write(ring, 0);
4315 
4316 	/* init the CE partitions */
4317 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4318 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4319 	amdgpu_ring_write(ring, 0x8000);
4320 	amdgpu_ring_write(ring, 0x8000);
4321 
4322 	amdgpu_ring_commit(ring);
4323 
4324 	return 0;
4325 }
4326 
4327 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4328 {
4329 	struct amdgpu_ring *ring;
4330 	u32 tmp;
4331 	u32 rb_bufsz;
4332 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
4333 	int r;
4334 
4335 	/* Set the write pointer delay */
4336 	WREG32(mmCP_RB_WPTR_DELAY, 0);
4337 
4338 	/* set the RB to use vmid 0 */
4339 	WREG32(mmCP_RB_VMID, 0);
4340 
4341 	/* Set ring buffer size */
4342 	ring = &adev->gfx.gfx_ring[0];
4343 	rb_bufsz = order_base_2(ring->ring_size / 8);
4344 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4345 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4346 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4347 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4348 #ifdef __BIG_ENDIAN
4349 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4350 #endif
4351 	WREG32(mmCP_RB0_CNTL, tmp);
4352 
4353 	/* Initialize the ring buffer's read and write pointers */
4354 	WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4355 	ring->wptr = 0;
4356 	WREG32(mmCP_RB0_WPTR, ring->wptr);
4357 
4358 	/* set the wb address wether it's enabled or not */
4359 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4360 	WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4361 	WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4362 
4363 	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4364 	WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4365 	WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4366 	mdelay(1);
4367 	WREG32(mmCP_RB0_CNTL, tmp);
4368 
4369 	rb_addr = ring->gpu_addr >> 8;
4370 	WREG32(mmCP_RB0_BASE, rb_addr);
4371 	WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4372 
4373 	/* no gfx doorbells on iceland */
4374 	if (adev->asic_type != CHIP_TOPAZ) {
4375 		tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4376 		if (ring->use_doorbell) {
4377 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4378 					    DOORBELL_OFFSET, ring->doorbell_index);
4379 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4380 					    DOORBELL_HIT, 0);
4381 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4382 					    DOORBELL_EN, 1);
4383 		} else {
4384 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4385 					    DOORBELL_EN, 0);
4386 		}
4387 		WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4388 
4389 		if (adev->asic_type == CHIP_TONGA) {
4390 			tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4391 					    DOORBELL_RANGE_LOWER,
4392 					    AMDGPU_DOORBELL_GFX_RING0);
4393 			WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4394 
4395 			WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4396 			       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4397 		}
4398 
4399 	}
4400 
4401 	/* start the ring */
4402 	gfx_v8_0_cp_gfx_start(adev);
4403 	ring->ready = true;
4404 	r = amdgpu_ring_test_ring(ring);
4405 	if (r)
4406 		ring->ready = false;
4407 
4408 	return r;
4409 }
4410 
4411 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4412 {
4413 	int i;
4414 
4415 	if (enable) {
4416 		WREG32(mmCP_MEC_CNTL, 0);
4417 	} else {
4418 		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4419 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
4420 			adev->gfx.compute_ring[i].ready = false;
4421 	}
4422 	udelay(50);
4423 }
4424 
4425 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4426 {
4427 	const struct gfx_firmware_header_v1_0 *mec_hdr;
4428 	const __le32 *fw_data;
4429 	unsigned i, fw_size;
4430 
4431 	if (!adev->gfx.mec_fw)
4432 		return -EINVAL;
4433 
4434 	gfx_v8_0_cp_compute_enable(adev, false);
4435 
4436 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4437 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4438 
4439 	fw_data = (const __le32 *)
4440 		(adev->gfx.mec_fw->data +
4441 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4442 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4443 
4444 	/* MEC1 */
4445 	WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4446 	for (i = 0; i < fw_size; i++)
4447 		WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4448 	WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4449 
4450 	/* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4451 	if (adev->gfx.mec2_fw) {
4452 		const struct gfx_firmware_header_v1_0 *mec2_hdr;
4453 
4454 		mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4455 		amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4456 
4457 		fw_data = (const __le32 *)
4458 			(adev->gfx.mec2_fw->data +
4459 			 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4460 		fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4461 
4462 		WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4463 		for (i = 0; i < fw_size; i++)
4464 			WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4465 		WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4466 	}
4467 
4468 	return 0;
4469 }
4470 
4471 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4472 {
4473 	int i, r;
4474 
4475 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4476 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4477 
4478 		if (ring->mqd_obj) {
4479 			r = amdgpu_bo_reserve(ring->mqd_obj, false);
4480 			if (unlikely(r != 0))
4481 				dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4482 
4483 			amdgpu_bo_unpin(ring->mqd_obj);
4484 			amdgpu_bo_unreserve(ring->mqd_obj);
4485 
4486 			amdgpu_bo_unref(&ring->mqd_obj);
4487 			ring->mqd_obj = NULL;
4488 		}
4489 	}
4490 }
4491 
4492 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4493 {
4494 	int r, i, j;
4495 	u32 tmp;
4496 	bool use_doorbell = true;
4497 	u64 hqd_gpu_addr;
4498 	u64 mqd_gpu_addr;
4499 	u64 eop_gpu_addr;
4500 	u64 wb_gpu_addr;
4501 	u32 *buf;
4502 	struct vi_mqd *mqd;
4503 
4504 	/* init the queues.  */
4505 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4506 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4507 
4508 		if (ring->mqd_obj == NULL) {
4509 			r = amdgpu_bo_create(adev,
4510 					     sizeof(struct vi_mqd),
4511 					     PAGE_SIZE, true,
4512 					     AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4513 					     NULL, &ring->mqd_obj);
4514 			if (r) {
4515 				dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4516 				return r;
4517 			}
4518 		}
4519 
4520 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
4521 		if (unlikely(r != 0)) {
4522 			gfx_v8_0_cp_compute_fini(adev);
4523 			return r;
4524 		}
4525 		r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4526 				  &mqd_gpu_addr);
4527 		if (r) {
4528 			dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4529 			gfx_v8_0_cp_compute_fini(adev);
4530 			return r;
4531 		}
4532 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4533 		if (r) {
4534 			dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4535 			gfx_v8_0_cp_compute_fini(adev);
4536 			return r;
4537 		}
4538 
4539 		/* init the mqd struct */
4540 		memset(buf, 0, sizeof(struct vi_mqd));
4541 
4542 		mqd = (struct vi_mqd *)buf;
4543 		mqd->header = 0xC0310800;
4544 		mqd->compute_pipelinestat_enable = 0x00000001;
4545 		mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4546 		mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4547 		mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4548 		mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4549 		mqd->compute_misc_reserved = 0x00000003;
4550 
4551 		mutex_lock(&adev->srbm_mutex);
4552 		vi_srbm_select(adev, ring->me,
4553 			       ring->pipe,
4554 			       ring->queue, 0);
4555 
4556 		eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
4557 		eop_gpu_addr >>= 8;
4558 
4559 		/* write the EOP addr */
4560 		WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
4561 		WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4562 
4563 		/* set the VMID assigned */
4564 		WREG32(mmCP_HQD_VMID, 0);
4565 
4566 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4567 		tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4568 		tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4569 				    (order_base_2(MEC_HPD_SIZE / 4) - 1));
4570 		WREG32(mmCP_HQD_EOP_CONTROL, tmp);
4571 
4572 		/* disable wptr polling */
4573 		tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4574 		tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4575 		WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4576 
4577 		mqd->cp_hqd_eop_base_addr_lo =
4578 			RREG32(mmCP_HQD_EOP_BASE_ADDR);
4579 		mqd->cp_hqd_eop_base_addr_hi =
4580 			RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
4581 
4582 		/* enable doorbell? */
4583 		tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4584 		if (use_doorbell) {
4585 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4586 		} else {
4587 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4588 		}
4589 		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4590 		mqd->cp_hqd_pq_doorbell_control = tmp;
4591 
4592 		/* disable the queue if it's active */
4593 		mqd->cp_hqd_dequeue_request = 0;
4594 		mqd->cp_hqd_pq_rptr = 0;
4595 		mqd->cp_hqd_pq_wptr= 0;
4596 		if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4597 			WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4598 			for (j = 0; j < adev->usec_timeout; j++) {
4599 				if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4600 					break;
4601 				udelay(1);
4602 			}
4603 			WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4604 			WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4605 			WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4606 		}
4607 
4608 		/* set the pointer to the MQD */
4609 		mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4610 		mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4611 		WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4612 		WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4613 
4614 		/* set MQD vmid to 0 */
4615 		tmp = RREG32(mmCP_MQD_CONTROL);
4616 		tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4617 		WREG32(mmCP_MQD_CONTROL, tmp);
4618 		mqd->cp_mqd_control = tmp;
4619 
4620 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4621 		hqd_gpu_addr = ring->gpu_addr >> 8;
4622 		mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4623 		mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4624 		WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4625 		WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4626 
4627 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4628 		tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4629 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4630 				    (order_base_2(ring->ring_size / 4) - 1));
4631 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4632 			       ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4633 #ifdef __BIG_ENDIAN
4634 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4635 #endif
4636 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4637 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4638 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4639 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4640 		WREG32(mmCP_HQD_PQ_CONTROL, tmp);
4641 		mqd->cp_hqd_pq_control = tmp;
4642 
4643 		/* set the wb address wether it's enabled or not */
4644 		wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4645 		mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4646 		mqd->cp_hqd_pq_rptr_report_addr_hi =
4647 			upper_32_bits(wb_gpu_addr) & 0xffff;
4648 		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4649 		       mqd->cp_hqd_pq_rptr_report_addr_lo);
4650 		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4651 		       mqd->cp_hqd_pq_rptr_report_addr_hi);
4652 
4653 		/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4654 		wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4655 		mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4656 		mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4657 		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
4658 		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4659 		       mqd->cp_hqd_pq_wptr_poll_addr_hi);
4660 
4661 		/* enable the doorbell if requested */
4662 		if (use_doorbell) {
4663 			if ((adev->asic_type == CHIP_CARRIZO) ||
4664 			    (adev->asic_type == CHIP_FIJI) ||
4665 			    (adev->asic_type == CHIP_STONEY) ||
4666 			    (adev->asic_type == CHIP_POLARIS11) ||
4667 			    (adev->asic_type == CHIP_POLARIS10)) {
4668 				WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4669 				       AMDGPU_DOORBELL_KIQ << 2);
4670 				WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4671 				       AMDGPU_DOORBELL_MEC_RING7 << 2);
4672 			}
4673 			tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4674 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4675 					    DOORBELL_OFFSET, ring->doorbell_index);
4676 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4677 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4678 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4679 			mqd->cp_hqd_pq_doorbell_control = tmp;
4680 
4681 		} else {
4682 			mqd->cp_hqd_pq_doorbell_control = 0;
4683 		}
4684 		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
4685 		       mqd->cp_hqd_pq_doorbell_control);
4686 
4687 		/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4688 		ring->wptr = 0;
4689 		mqd->cp_hqd_pq_wptr = ring->wptr;
4690 		WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4691 		mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4692 
4693 		/* set the vmid for the queue */
4694 		mqd->cp_hqd_vmid = 0;
4695 		WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4696 
4697 		tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4698 		tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4699 		WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
4700 		mqd->cp_hqd_persistent_state = tmp;
4701 		if (adev->asic_type == CHIP_STONEY ||
4702 			adev->asic_type == CHIP_POLARIS11 ||
4703 			adev->asic_type == CHIP_POLARIS10) {
4704 			tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
4705 			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
4706 			WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
4707 		}
4708 
4709 		/* activate the queue */
4710 		mqd->cp_hqd_active = 1;
4711 		WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4712 
4713 		vi_srbm_select(adev, 0, 0, 0, 0);
4714 		mutex_unlock(&adev->srbm_mutex);
4715 
4716 		amdgpu_bo_kunmap(ring->mqd_obj);
4717 		amdgpu_bo_unreserve(ring->mqd_obj);
4718 	}
4719 
4720 	if (use_doorbell) {
4721 		tmp = RREG32(mmCP_PQ_STATUS);
4722 		tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4723 		WREG32(mmCP_PQ_STATUS, tmp);
4724 	}
4725 
4726 	gfx_v8_0_cp_compute_enable(adev, true);
4727 
4728 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4729 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4730 
4731 		ring->ready = true;
4732 		r = amdgpu_ring_test_ring(ring);
4733 		if (r)
4734 			ring->ready = false;
4735 	}
4736 
4737 	return 0;
4738 }
4739 
4740 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4741 {
4742 	int r;
4743 
4744 	if (!(adev->flags & AMD_IS_APU))
4745 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4746 
4747 	if (!adev->pp_enabled) {
4748 		if (!adev->firmware.smu_load) {
4749 			/* legacy firmware loading */
4750 			r = gfx_v8_0_cp_gfx_load_microcode(adev);
4751 			if (r)
4752 				return r;
4753 
4754 			r = gfx_v8_0_cp_compute_load_microcode(adev);
4755 			if (r)
4756 				return r;
4757 		} else {
4758 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4759 							AMDGPU_UCODE_ID_CP_CE);
4760 			if (r)
4761 				return -EINVAL;
4762 
4763 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4764 							AMDGPU_UCODE_ID_CP_PFP);
4765 			if (r)
4766 				return -EINVAL;
4767 
4768 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4769 							AMDGPU_UCODE_ID_CP_ME);
4770 			if (r)
4771 				return -EINVAL;
4772 
4773 			if (adev->asic_type == CHIP_TOPAZ) {
4774 				r = gfx_v8_0_cp_compute_load_microcode(adev);
4775 				if (r)
4776 					return r;
4777 			} else {
4778 				r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4779 										 AMDGPU_UCODE_ID_CP_MEC1);
4780 				if (r)
4781 					return -EINVAL;
4782 			}
4783 		}
4784 	}
4785 
4786 	r = gfx_v8_0_cp_gfx_resume(adev);
4787 	if (r)
4788 		return r;
4789 
4790 	r = gfx_v8_0_cp_compute_resume(adev);
4791 	if (r)
4792 		return r;
4793 
4794 	gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4795 
4796 	return 0;
4797 }
4798 
4799 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4800 {
4801 	gfx_v8_0_cp_gfx_enable(adev, enable);
4802 	gfx_v8_0_cp_compute_enable(adev, enable);
4803 }
4804 
4805 static int gfx_v8_0_hw_init(void *handle)
4806 {
4807 	int r;
4808 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4809 
4810 	gfx_v8_0_init_golden_registers(adev);
4811 	gfx_v8_0_gpu_init(adev);
4812 
4813 	r = gfx_v8_0_rlc_resume(adev);
4814 	if (r)
4815 		return r;
4816 
4817 	r = gfx_v8_0_cp_resume(adev);
4818 
4819 	return r;
4820 }
4821 
4822 static int gfx_v8_0_hw_fini(void *handle)
4823 {
4824 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4825 
4826 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4827 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4828 	if (amdgpu_sriov_vf(adev)) {
4829 		pr_debug("For SRIOV client, shouldn't do anything.\n");
4830 		return 0;
4831 	}
4832 	gfx_v8_0_cp_enable(adev, false);
4833 	gfx_v8_0_rlc_stop(adev);
4834 	gfx_v8_0_cp_compute_fini(adev);
4835 
4836 	amdgpu_set_powergating_state(adev,
4837 			AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
4838 
4839 	return 0;
4840 }
4841 
4842 static int gfx_v8_0_suspend(void *handle)
4843 {
4844 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4845 
4846 	return gfx_v8_0_hw_fini(adev);
4847 }
4848 
4849 static int gfx_v8_0_resume(void *handle)
4850 {
4851 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4852 
4853 	return gfx_v8_0_hw_init(adev);
4854 }
4855 
4856 static bool gfx_v8_0_is_idle(void *handle)
4857 {
4858 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4859 
4860 	if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
4861 		return false;
4862 	else
4863 		return true;
4864 }
4865 
4866 static int gfx_v8_0_wait_for_idle(void *handle)
4867 {
4868 	unsigned i;
4869 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4870 
4871 	for (i = 0; i < adev->usec_timeout; i++) {
4872 		if (gfx_v8_0_is_idle(handle))
4873 			return 0;
4874 
4875 		udelay(1);
4876 	}
4877 	return -ETIMEDOUT;
4878 }
4879 
4880 static bool gfx_v8_0_check_soft_reset(void *handle)
4881 {
4882 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4883 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4884 	u32 tmp;
4885 
4886 	/* GRBM_STATUS */
4887 	tmp = RREG32(mmGRBM_STATUS);
4888 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4889 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4890 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4891 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4892 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4893 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4894 		   GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4895 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4896 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4897 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4898 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4899 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4900 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4901 	}
4902 
4903 	/* GRBM_STATUS2 */
4904 	tmp = RREG32(mmGRBM_STATUS2);
4905 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4906 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4907 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4908 
4909 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
4910 	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
4911 	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
4912 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4913 						SOFT_RESET_CPF, 1);
4914 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4915 						SOFT_RESET_CPC, 1);
4916 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4917 						SOFT_RESET_CPG, 1);
4918 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
4919 						SOFT_RESET_GRBM, 1);
4920 	}
4921 
4922 	/* SRBM_STATUS */
4923 	tmp = RREG32(mmSRBM_STATUS);
4924 	if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4925 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4926 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4927 	if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
4928 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4929 						SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
4930 
4931 	if (grbm_soft_reset || srbm_soft_reset) {
4932 		adev->gfx.grbm_soft_reset = grbm_soft_reset;
4933 		adev->gfx.srbm_soft_reset = srbm_soft_reset;
4934 		return true;
4935 	} else {
4936 		adev->gfx.grbm_soft_reset = 0;
4937 		adev->gfx.srbm_soft_reset = 0;
4938 		return false;
4939 	}
4940 }
4941 
4942 static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev,
4943 				  struct amdgpu_ring *ring)
4944 {
4945 	int i;
4946 
4947 	vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4948 	if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4949 		u32 tmp;
4950 		tmp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
4951 		tmp = REG_SET_FIELD(tmp, CP_HQD_DEQUEUE_REQUEST,
4952 				    DEQUEUE_REQ, 2);
4953 		WREG32(mmCP_HQD_DEQUEUE_REQUEST, tmp);
4954 		for (i = 0; i < adev->usec_timeout; i++) {
4955 			if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4956 				break;
4957 			udelay(1);
4958 		}
4959 	}
4960 }
4961 
4962 static int gfx_v8_0_pre_soft_reset(void *handle)
4963 {
4964 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4965 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4966 
4967 	if ((!adev->gfx.grbm_soft_reset) &&
4968 	    (!adev->gfx.srbm_soft_reset))
4969 		return 0;
4970 
4971 	grbm_soft_reset = adev->gfx.grbm_soft_reset;
4972 	srbm_soft_reset = adev->gfx.srbm_soft_reset;
4973 
4974 	/* stop the rlc */
4975 	gfx_v8_0_rlc_stop(adev);
4976 
4977 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
4978 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
4979 		/* Disable GFX parsing/prefetching */
4980 		gfx_v8_0_cp_gfx_enable(adev, false);
4981 
4982 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
4983 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
4984 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
4985 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
4986 		int i;
4987 
4988 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4989 			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4990 
4991 			gfx_v8_0_inactive_hqd(adev, ring);
4992 		}
4993 		/* Disable MEC parsing/prefetching */
4994 		gfx_v8_0_cp_compute_enable(adev, false);
4995 	}
4996 
4997        return 0;
4998 }
4999 
5000 static int gfx_v8_0_soft_reset(void *handle)
5001 {
5002 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5003 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5004 	u32 tmp;
5005 
5006 	if ((!adev->gfx.grbm_soft_reset) &&
5007 	    (!adev->gfx.srbm_soft_reset))
5008 		return 0;
5009 
5010 	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5011 	srbm_soft_reset = adev->gfx.srbm_soft_reset;
5012 
5013 	if (grbm_soft_reset || srbm_soft_reset) {
5014 		tmp = RREG32(mmGMCON_DEBUG);
5015 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5016 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5017 		WREG32(mmGMCON_DEBUG, tmp);
5018 		udelay(50);
5019 	}
5020 
5021 	if (grbm_soft_reset) {
5022 		tmp = RREG32(mmGRBM_SOFT_RESET);
5023 		tmp |= grbm_soft_reset;
5024 		dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5025 		WREG32(mmGRBM_SOFT_RESET, tmp);
5026 		tmp = RREG32(mmGRBM_SOFT_RESET);
5027 
5028 		udelay(50);
5029 
5030 		tmp &= ~grbm_soft_reset;
5031 		WREG32(mmGRBM_SOFT_RESET, tmp);
5032 		tmp = RREG32(mmGRBM_SOFT_RESET);
5033 	}
5034 
5035 	if (srbm_soft_reset) {
5036 		tmp = RREG32(mmSRBM_SOFT_RESET);
5037 		tmp |= srbm_soft_reset;
5038 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5039 		WREG32(mmSRBM_SOFT_RESET, tmp);
5040 		tmp = RREG32(mmSRBM_SOFT_RESET);
5041 
5042 		udelay(50);
5043 
5044 		tmp &= ~srbm_soft_reset;
5045 		WREG32(mmSRBM_SOFT_RESET, tmp);
5046 		tmp = RREG32(mmSRBM_SOFT_RESET);
5047 	}
5048 
5049 	if (grbm_soft_reset || srbm_soft_reset) {
5050 		tmp = RREG32(mmGMCON_DEBUG);
5051 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5052 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5053 		WREG32(mmGMCON_DEBUG, tmp);
5054 	}
5055 
5056 	/* Wait a little for things to settle down */
5057 	udelay(50);
5058 
5059 	return 0;
5060 }
5061 
5062 static void gfx_v8_0_init_hqd(struct amdgpu_device *adev,
5063 			      struct amdgpu_ring *ring)
5064 {
5065 	vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5066 	WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
5067 	WREG32(mmCP_HQD_PQ_RPTR, 0);
5068 	WREG32(mmCP_HQD_PQ_WPTR, 0);
5069 	vi_srbm_select(adev, 0, 0, 0, 0);
5070 }
5071 
5072 static int gfx_v8_0_post_soft_reset(void *handle)
5073 {
5074 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5075 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5076 
5077 	if ((!adev->gfx.grbm_soft_reset) &&
5078 	    (!adev->gfx.srbm_soft_reset))
5079 		return 0;
5080 
5081 	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5082 	srbm_soft_reset = adev->gfx.srbm_soft_reset;
5083 
5084 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5085 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5086 		gfx_v8_0_cp_gfx_resume(adev);
5087 
5088 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5089 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5090 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5091 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5092 		int i;
5093 
5094 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5095 			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5096 
5097 			gfx_v8_0_init_hqd(adev, ring);
5098 		}
5099 		gfx_v8_0_cp_compute_resume(adev);
5100 	}
5101 	gfx_v8_0_rlc_start(adev);
5102 
5103 	return 0;
5104 }
5105 
5106 /**
5107  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5108  *
5109  * @adev: amdgpu_device pointer
5110  *
5111  * Fetches a GPU clock counter snapshot.
5112  * Returns the 64 bit clock counter snapshot.
5113  */
5114 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5115 {
5116 	uint64_t clock;
5117 
5118 	mutex_lock(&adev->gfx.gpu_clock_mutex);
5119 	WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5120 	clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5121 		((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5122 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
5123 	return clock;
5124 }
5125 
5126 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5127 					  uint32_t vmid,
5128 					  uint32_t gds_base, uint32_t gds_size,
5129 					  uint32_t gws_base, uint32_t gws_size,
5130 					  uint32_t oa_base, uint32_t oa_size)
5131 {
5132 	gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5133 	gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5134 
5135 	gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5136 	gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5137 
5138 	oa_base = oa_base >> AMDGPU_OA_SHIFT;
5139 	oa_size = oa_size >> AMDGPU_OA_SHIFT;
5140 
5141 	/* GDS Base */
5142 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5143 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5144 				WRITE_DATA_DST_SEL(0)));
5145 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5146 	amdgpu_ring_write(ring, 0);
5147 	amdgpu_ring_write(ring, gds_base);
5148 
5149 	/* GDS Size */
5150 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5151 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5152 				WRITE_DATA_DST_SEL(0)));
5153 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5154 	amdgpu_ring_write(ring, 0);
5155 	amdgpu_ring_write(ring, gds_size);
5156 
5157 	/* GWS */
5158 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5159 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5160 				WRITE_DATA_DST_SEL(0)));
5161 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5162 	amdgpu_ring_write(ring, 0);
5163 	amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5164 
5165 	/* OA */
5166 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5167 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5168 				WRITE_DATA_DST_SEL(0)));
5169 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5170 	amdgpu_ring_write(ring, 0);
5171 	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5172 }
5173 
5174 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5175 {
5176 	WREG32(mmSQ_IND_INDEX,
5177 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5178 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5179 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
5180 		(SQ_IND_INDEX__FORCE_READ_MASK));
5181 	return RREG32(mmSQ_IND_DATA);
5182 }
5183 
5184 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5185 			   uint32_t wave, uint32_t thread,
5186 			   uint32_t regno, uint32_t num, uint32_t *out)
5187 {
5188 	WREG32(mmSQ_IND_INDEX,
5189 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5190 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5191 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
5192 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5193 		(SQ_IND_INDEX__FORCE_READ_MASK) |
5194 		(SQ_IND_INDEX__AUTO_INCR_MASK));
5195 	while (num--)
5196 		*(out++) = RREG32(mmSQ_IND_DATA);
5197 }
5198 
5199 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5200 {
5201 	/* type 0 wave data */
5202 	dst[(*no_fields)++] = 0;
5203 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5204 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5205 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5206 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5207 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5208 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5209 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5210 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5211 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5212 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5213 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5214 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5215 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5216 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5217 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5218 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5219 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5220 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5221 }
5222 
5223 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5224 				     uint32_t wave, uint32_t start,
5225 				     uint32_t size, uint32_t *dst)
5226 {
5227 	wave_read_regs(
5228 		adev, simd, wave, 0,
5229 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5230 }
5231 
5232 
5233 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5234 	.get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5235 	.select_se_sh = &gfx_v8_0_select_se_sh,
5236 	.read_wave_data = &gfx_v8_0_read_wave_data,
5237 	.read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5238 };
5239 
5240 static int gfx_v8_0_early_init(void *handle)
5241 {
5242 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5243 
5244 	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5245 	adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5246 	adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5247 	gfx_v8_0_set_ring_funcs(adev);
5248 	gfx_v8_0_set_irq_funcs(adev);
5249 	gfx_v8_0_set_gds_init(adev);
5250 	gfx_v8_0_set_rlc_funcs(adev);
5251 
5252 	return 0;
5253 }
5254 
5255 static int gfx_v8_0_late_init(void *handle)
5256 {
5257 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5258 	int r;
5259 
5260 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5261 	if (r)
5262 		return r;
5263 
5264 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5265 	if (r)
5266 		return r;
5267 
5268 	/* requires IBs so do in late init after IB pool is initialized */
5269 	r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5270 	if (r)
5271 		return r;
5272 
5273 	amdgpu_set_powergating_state(adev,
5274 			AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5275 
5276 	return 0;
5277 }
5278 
5279 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5280 						       bool enable)
5281 {
5282 	if (adev->asic_type == CHIP_POLARIS11)
5283 		/* Send msg to SMU via Powerplay */
5284 		amdgpu_set_powergating_state(adev,
5285 					     AMD_IP_BLOCK_TYPE_SMC,
5286 					     enable ?
5287 					     AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5288 
5289 	WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5290 }
5291 
5292 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5293 							bool enable)
5294 {
5295 	WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5296 }
5297 
5298 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5299 		bool enable)
5300 {
5301 	WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5302 }
5303 
5304 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5305 					  bool enable)
5306 {
5307 	WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5308 }
5309 
5310 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5311 						bool enable)
5312 {
5313 	WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5314 
5315 	/* Read any GFX register to wake up GFX. */
5316 	if (!enable)
5317 		RREG32(mmDB_RENDER_CONTROL);
5318 }
5319 
5320 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5321 					  bool enable)
5322 {
5323 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5324 		cz_enable_gfx_cg_power_gating(adev, true);
5325 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5326 			cz_enable_gfx_pipeline_power_gating(adev, true);
5327 	} else {
5328 		cz_enable_gfx_cg_power_gating(adev, false);
5329 		cz_enable_gfx_pipeline_power_gating(adev, false);
5330 	}
5331 }
5332 
5333 static int gfx_v8_0_set_powergating_state(void *handle,
5334 					  enum amd_powergating_state state)
5335 {
5336 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5337 	bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
5338 
5339 	switch (adev->asic_type) {
5340 	case CHIP_CARRIZO:
5341 	case CHIP_STONEY:
5342 
5343 		cz_update_gfx_cg_power_gating(adev, enable);
5344 
5345 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5346 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5347 		else
5348 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5349 
5350 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5351 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5352 		else
5353 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5354 		break;
5355 	case CHIP_POLARIS11:
5356 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5357 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5358 		else
5359 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5360 
5361 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5362 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5363 		else
5364 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5365 
5366 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5367 			polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5368 		else
5369 			polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5370 		break;
5371 	default:
5372 		break;
5373 	}
5374 
5375 	return 0;
5376 }
5377 
5378 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5379 				     uint32_t reg_addr, uint32_t cmd)
5380 {
5381 	uint32_t data;
5382 
5383 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5384 
5385 	WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5386 	WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5387 
5388 	data = RREG32(mmRLC_SERDES_WR_CTRL);
5389 	if (adev->asic_type == CHIP_STONEY)
5390 		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5391 			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5392 			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5393 			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5394 			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5395 			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5396 			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5397 			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5398 			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5399 	else
5400 		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5401 			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5402 			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5403 			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5404 			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5405 			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5406 			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5407 			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5408 			  RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5409 			  RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5410 			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5411 	data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5412 		 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5413 		 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5414 		 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5415 
5416 	WREG32(mmRLC_SERDES_WR_CTRL, data);
5417 }
5418 
5419 #define MSG_ENTER_RLC_SAFE_MODE     1
5420 #define MSG_EXIT_RLC_SAFE_MODE      0
5421 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5422 #define RLC_GPR_REG2__REQ__SHIFT 0
5423 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5424 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5425 
5426 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
5427 {
5428 	u32 data = 0;
5429 	unsigned i;
5430 
5431 	data = RREG32(mmRLC_CNTL);
5432 	if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5433 		return;
5434 
5435 	if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5436 	    (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5437 			       AMD_PG_SUPPORT_GFX_DMG))) {
5438 		data |= RLC_GPR_REG2__REQ_MASK;
5439 		data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5440 		data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5441 		WREG32(mmRLC_GPR_REG2, data);
5442 
5443 		for (i = 0; i < adev->usec_timeout; i++) {
5444 			if ((RREG32(mmRLC_GPM_STAT) &
5445 			     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5446 			      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5447 			    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5448 			     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5449 				break;
5450 			udelay(1);
5451 		}
5452 
5453 		for (i = 0; i < adev->usec_timeout; i++) {
5454 			if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
5455 				break;
5456 			udelay(1);
5457 		}
5458 		adev->gfx.rlc.in_safe_mode = true;
5459 	}
5460 }
5461 
5462 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
5463 {
5464 	u32 data;
5465 	unsigned i;
5466 
5467 	data = RREG32(mmRLC_CNTL);
5468 	if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5469 		return;
5470 
5471 	if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5472 	    (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5473 			       AMD_PG_SUPPORT_GFX_DMG))) {
5474 		data |= RLC_GPR_REG2__REQ_MASK;
5475 		data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5476 		data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5477 		WREG32(mmRLC_GPR_REG2, data);
5478 		adev->gfx.rlc.in_safe_mode = false;
5479 	}
5480 
5481 	for (i = 0; i < adev->usec_timeout; i++) {
5482 		if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
5483 			break;
5484 		udelay(1);
5485 	}
5486 }
5487 
5488 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5489 {
5490 	u32 data;
5491 	unsigned i;
5492 
5493 	data = RREG32(mmRLC_CNTL);
5494 	if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5495 		return;
5496 
5497 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5498 		data |= RLC_SAFE_MODE__CMD_MASK;
5499 		data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5500 		data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5501 		WREG32(mmRLC_SAFE_MODE, data);
5502 
5503 		for (i = 0; i < adev->usec_timeout; i++) {
5504 			if ((RREG32(mmRLC_GPM_STAT) &
5505 			     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5506 			      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5507 			    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5508 			     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5509 				break;
5510 			udelay(1);
5511 		}
5512 
5513 		for (i = 0; i < adev->usec_timeout; i++) {
5514 			if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5515 				break;
5516 			udelay(1);
5517 		}
5518 		adev->gfx.rlc.in_safe_mode = true;
5519 	}
5520 }
5521 
5522 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5523 {
5524 	u32 data = 0;
5525 	unsigned i;
5526 
5527 	data = RREG32(mmRLC_CNTL);
5528 	if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5529 		return;
5530 
5531 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5532 		if (adev->gfx.rlc.in_safe_mode) {
5533 			data |= RLC_SAFE_MODE__CMD_MASK;
5534 			data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5535 			WREG32(mmRLC_SAFE_MODE, data);
5536 			adev->gfx.rlc.in_safe_mode = false;
5537 		}
5538 	}
5539 
5540 	for (i = 0; i < adev->usec_timeout; i++) {
5541 		if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5542 			break;
5543 		udelay(1);
5544 	}
5545 }
5546 
5547 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
5548 {
5549 	adev->gfx.rlc.in_safe_mode = true;
5550 }
5551 
5552 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
5553 {
5554 	adev->gfx.rlc.in_safe_mode = false;
5555 }
5556 
5557 static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
5558 	.enter_safe_mode = cz_enter_rlc_safe_mode,
5559 	.exit_safe_mode = cz_exit_rlc_safe_mode
5560 };
5561 
5562 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5563 	.enter_safe_mode = iceland_enter_rlc_safe_mode,
5564 	.exit_safe_mode = iceland_exit_rlc_safe_mode
5565 };
5566 
5567 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
5568 	.enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
5569 	.exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
5570 };
5571 
5572 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5573 						      bool enable)
5574 {
5575 	uint32_t temp, data;
5576 
5577 	adev->gfx.rlc.funcs->enter_safe_mode(adev);
5578 
5579 	/* It is disabled by HW by default */
5580 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5581 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5582 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5583 				/* 1 - RLC memory Light sleep */
5584 				WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5585 
5586 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5587 				WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5588 		}
5589 
5590 		/* 3 - RLC_CGTT_MGCG_OVERRIDE */
5591 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5592 		if (adev->flags & AMD_IS_APU)
5593 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5594 				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5595 				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5596 		else
5597 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5598 				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5599 				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5600 				  RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5601 
5602 		if (temp != data)
5603 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5604 
5605 		/* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5606 		gfx_v8_0_wait_for_rlc_serdes(adev);
5607 
5608 		/* 5 - clear mgcg override */
5609 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5610 
5611 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5612 			/* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5613 			temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5614 			data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5615 			data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5616 			data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5617 			data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5618 			if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5619 			    (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5620 				data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5621 			data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5622 			data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5623 			if (temp != data)
5624 				WREG32(mmCGTS_SM_CTRL_REG, data);
5625 		}
5626 		udelay(50);
5627 
5628 		/* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5629 		gfx_v8_0_wait_for_rlc_serdes(adev);
5630 	} else {
5631 		/* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5632 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5633 		data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5634 				RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5635 				RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5636 				RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5637 		if (temp != data)
5638 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5639 
5640 		/* 2 - disable MGLS in RLC */
5641 		data = RREG32(mmRLC_MEM_SLP_CNTL);
5642 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5643 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5644 			WREG32(mmRLC_MEM_SLP_CNTL, data);
5645 		}
5646 
5647 		/* 3 - disable MGLS in CP */
5648 		data = RREG32(mmCP_MEM_SLP_CNTL);
5649 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5650 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5651 			WREG32(mmCP_MEM_SLP_CNTL, data);
5652 		}
5653 
5654 		/* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5655 		temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5656 		data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5657 				CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5658 		if (temp != data)
5659 			WREG32(mmCGTS_SM_CTRL_REG, data);
5660 
5661 		/* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5662 		gfx_v8_0_wait_for_rlc_serdes(adev);
5663 
5664 		/* 6 - set mgcg override */
5665 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5666 
5667 		udelay(50);
5668 
5669 		/* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5670 		gfx_v8_0_wait_for_rlc_serdes(adev);
5671 	}
5672 
5673 	adev->gfx.rlc.funcs->exit_safe_mode(adev);
5674 }
5675 
5676 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5677 						      bool enable)
5678 {
5679 	uint32_t temp, temp1, data, data1;
5680 
5681 	temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5682 
5683 	adev->gfx.rlc.funcs->enter_safe_mode(adev);
5684 
5685 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5686 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5687 		data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5688 		if (temp1 != data1)
5689 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5690 
5691 		/* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5692 		gfx_v8_0_wait_for_rlc_serdes(adev);
5693 
5694 		/* 2 - clear cgcg override */
5695 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5696 
5697 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5698 		gfx_v8_0_wait_for_rlc_serdes(adev);
5699 
5700 		/* 3 - write cmd to set CGLS */
5701 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5702 
5703 		/* 4 - enable cgcg */
5704 		data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5705 
5706 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5707 			/* enable cgls*/
5708 			data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5709 
5710 			temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5711 			data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5712 
5713 			if (temp1 != data1)
5714 				WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5715 		} else {
5716 			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5717 		}
5718 
5719 		if (temp != data)
5720 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5721 
5722 		/* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5723 		 * Cmp_busy/GFX_Idle interrupts
5724 		 */
5725 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5726 	} else {
5727 		/* disable cntx_empty_int_enable & GFX Idle interrupt */
5728 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5729 
5730 		/* TEST CGCG */
5731 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5732 		data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5733 				RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5734 		if (temp1 != data1)
5735 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5736 
5737 		/* read gfx register to wake up cgcg */
5738 		RREG32(mmCB_CGTT_SCLK_CTRL);
5739 		RREG32(mmCB_CGTT_SCLK_CTRL);
5740 		RREG32(mmCB_CGTT_SCLK_CTRL);
5741 		RREG32(mmCB_CGTT_SCLK_CTRL);
5742 
5743 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5744 		gfx_v8_0_wait_for_rlc_serdes(adev);
5745 
5746 		/* write cmd to Set CGCG Overrride */
5747 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5748 
5749 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5750 		gfx_v8_0_wait_for_rlc_serdes(adev);
5751 
5752 		/* write cmd to Clear CGLS */
5753 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5754 
5755 		/* disable cgcg, cgls should be disabled too. */
5756 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5757 			  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5758 		if (temp != data)
5759 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5760 	}
5761 
5762 	gfx_v8_0_wait_for_rlc_serdes(adev);
5763 
5764 	adev->gfx.rlc.funcs->exit_safe_mode(adev);
5765 }
5766 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5767 					    bool enable)
5768 {
5769 	if (enable) {
5770 		/* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5771 		 * ===  MGCG + MGLS + TS(CG/LS) ===
5772 		 */
5773 		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5774 		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5775 	} else {
5776 		/* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5777 		 * ===  CGCG + CGLS ===
5778 		 */
5779 		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5780 		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5781 	}
5782 	return 0;
5783 }
5784 
5785 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5786 					  enum amd_clockgating_state state)
5787 {
5788 	uint32_t msg_id, pp_state = 0;
5789 	uint32_t pp_support_state = 0;
5790 	void *pp_handle = adev->powerplay.pp_handle;
5791 
5792 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5793 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5794 			pp_support_state = PP_STATE_SUPPORT_LS;
5795 			pp_state = PP_STATE_LS;
5796 		}
5797 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5798 			pp_support_state |= PP_STATE_SUPPORT_CG;
5799 			pp_state |= PP_STATE_CG;
5800 		}
5801 		if (state == AMD_CG_STATE_UNGATE)
5802 			pp_state = 0;
5803 
5804 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5805 				PP_BLOCK_GFX_CG,
5806 				pp_support_state,
5807 				pp_state);
5808 		amd_set_clockgating_by_smu(pp_handle, msg_id);
5809 	}
5810 
5811 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5812 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5813 			pp_support_state = PP_STATE_SUPPORT_LS;
5814 			pp_state = PP_STATE_LS;
5815 		}
5816 
5817 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5818 			pp_support_state |= PP_STATE_SUPPORT_CG;
5819 			pp_state |= PP_STATE_CG;
5820 		}
5821 
5822 		if (state == AMD_CG_STATE_UNGATE)
5823 			pp_state = 0;
5824 
5825 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5826 				PP_BLOCK_GFX_MG,
5827 				pp_support_state,
5828 				pp_state);
5829 		amd_set_clockgating_by_smu(pp_handle, msg_id);
5830 	}
5831 
5832 	return 0;
5833 }
5834 
5835 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5836 					  enum amd_clockgating_state state)
5837 {
5838 
5839 	uint32_t msg_id, pp_state = 0;
5840 	uint32_t pp_support_state = 0;
5841 	void *pp_handle = adev->powerplay.pp_handle;
5842 
5843 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5844 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5845 			pp_support_state = PP_STATE_SUPPORT_LS;
5846 			pp_state = PP_STATE_LS;
5847 		}
5848 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5849 			pp_support_state |= PP_STATE_SUPPORT_CG;
5850 			pp_state |= PP_STATE_CG;
5851 		}
5852 		if (state == AMD_CG_STATE_UNGATE)
5853 			pp_state = 0;
5854 
5855 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5856 				PP_BLOCK_GFX_CG,
5857 				pp_support_state,
5858 				pp_state);
5859 		amd_set_clockgating_by_smu(pp_handle, msg_id);
5860 	}
5861 
5862 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5863 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5864 			pp_support_state = PP_STATE_SUPPORT_LS;
5865 			pp_state = PP_STATE_LS;
5866 		}
5867 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5868 			pp_support_state |= PP_STATE_SUPPORT_CG;
5869 			pp_state |= PP_STATE_CG;
5870 		}
5871 		if (state == AMD_CG_STATE_UNGATE)
5872 			pp_state = 0;
5873 
5874 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5875 				PP_BLOCK_GFX_3D,
5876 				pp_support_state,
5877 				pp_state);
5878 		amd_set_clockgating_by_smu(pp_handle, msg_id);
5879 	}
5880 
5881 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5882 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5883 			pp_support_state = PP_STATE_SUPPORT_LS;
5884 			pp_state = PP_STATE_LS;
5885 		}
5886 
5887 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5888 			pp_support_state |= PP_STATE_SUPPORT_CG;
5889 			pp_state |= PP_STATE_CG;
5890 		}
5891 
5892 		if (state == AMD_CG_STATE_UNGATE)
5893 			pp_state = 0;
5894 
5895 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5896 				PP_BLOCK_GFX_MG,
5897 				pp_support_state,
5898 				pp_state);
5899 		amd_set_clockgating_by_smu(pp_handle, msg_id);
5900 	}
5901 
5902 	if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5903 		pp_support_state = PP_STATE_SUPPORT_LS;
5904 
5905 		if (state == AMD_CG_STATE_UNGATE)
5906 			pp_state = 0;
5907 		else
5908 			pp_state = PP_STATE_LS;
5909 
5910 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5911 				PP_BLOCK_GFX_RLC,
5912 				pp_support_state,
5913 				pp_state);
5914 		amd_set_clockgating_by_smu(pp_handle, msg_id);
5915 	}
5916 
5917 	if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5918 		pp_support_state = PP_STATE_SUPPORT_LS;
5919 
5920 		if (state == AMD_CG_STATE_UNGATE)
5921 			pp_state = 0;
5922 		else
5923 			pp_state = PP_STATE_LS;
5924 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5925 			PP_BLOCK_GFX_CP,
5926 			pp_support_state,
5927 			pp_state);
5928 		amd_set_clockgating_by_smu(pp_handle, msg_id);
5929 	}
5930 
5931 	return 0;
5932 }
5933 
5934 static int gfx_v8_0_set_clockgating_state(void *handle,
5935 					  enum amd_clockgating_state state)
5936 {
5937 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5938 
5939 	switch (adev->asic_type) {
5940 	case CHIP_FIJI:
5941 	case CHIP_CARRIZO:
5942 	case CHIP_STONEY:
5943 		gfx_v8_0_update_gfx_clock_gating(adev,
5944 						 state == AMD_CG_STATE_GATE ? true : false);
5945 		break;
5946 	case CHIP_TONGA:
5947 		gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
5948 		break;
5949 	case CHIP_POLARIS10:
5950 	case CHIP_POLARIS11:
5951 		gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
5952 		break;
5953 	default:
5954 		break;
5955 	}
5956 	return 0;
5957 }
5958 
5959 static u32 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
5960 {
5961 	return ring->adev->wb.wb[ring->rptr_offs];
5962 }
5963 
5964 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5965 {
5966 	struct amdgpu_device *adev = ring->adev;
5967 
5968 	if (ring->use_doorbell)
5969 		/* XXX check if swapping is necessary on BE */
5970 		return ring->adev->wb.wb[ring->wptr_offs];
5971 	else
5972 		return RREG32(mmCP_RB0_WPTR);
5973 }
5974 
5975 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5976 {
5977 	struct amdgpu_device *adev = ring->adev;
5978 
5979 	if (ring->use_doorbell) {
5980 		/* XXX check if swapping is necessary on BE */
5981 		adev->wb.wb[ring->wptr_offs] = ring->wptr;
5982 		WDOORBELL32(ring->doorbell_index, ring->wptr);
5983 	} else {
5984 		WREG32(mmCP_RB0_WPTR, ring->wptr);
5985 		(void)RREG32(mmCP_RB0_WPTR);
5986 	}
5987 }
5988 
5989 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5990 {
5991 	u32 ref_and_mask, reg_mem_engine;
5992 
5993 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5994 		switch (ring->me) {
5995 		case 1:
5996 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
5997 			break;
5998 		case 2:
5999 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6000 			break;
6001 		default:
6002 			return;
6003 		}
6004 		reg_mem_engine = 0;
6005 	} else {
6006 		ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6007 		reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6008 	}
6009 
6010 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6011 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6012 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
6013 				 reg_mem_engine));
6014 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6015 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6016 	amdgpu_ring_write(ring, ref_and_mask);
6017 	amdgpu_ring_write(ring, ref_and_mask);
6018 	amdgpu_ring_write(ring, 0x20); /* poll interval */
6019 }
6020 
6021 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6022 {
6023 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6024 	amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6025 		EVENT_INDEX(4));
6026 
6027 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6028 	amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6029 		EVENT_INDEX(0));
6030 }
6031 
6032 
6033 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6034 {
6035 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6036 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6037 				 WRITE_DATA_DST_SEL(0) |
6038 				 WR_CONFIRM));
6039 	amdgpu_ring_write(ring, mmHDP_DEBUG0);
6040 	amdgpu_ring_write(ring, 0);
6041 	amdgpu_ring_write(ring, 1);
6042 
6043 }
6044 
6045 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6046 				      struct amdgpu_ib *ib,
6047 				      unsigned vm_id, bool ctx_switch)
6048 {
6049 	u32 header, control = 0;
6050 
6051 	if (ib->flags & AMDGPU_IB_FLAG_CE)
6052 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6053 	else
6054 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6055 
6056 	control |= ib->length_dw | (vm_id << 24);
6057 
6058 	amdgpu_ring_write(ring, header);
6059 	amdgpu_ring_write(ring,
6060 #ifdef __BIG_ENDIAN
6061 			  (2 << 0) |
6062 #endif
6063 			  (ib->gpu_addr & 0xFFFFFFFC));
6064 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6065 	amdgpu_ring_write(ring, control);
6066 }
6067 
6068 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6069 					  struct amdgpu_ib *ib,
6070 					  unsigned vm_id, bool ctx_switch)
6071 {
6072 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6073 
6074 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6075 	amdgpu_ring_write(ring,
6076 #ifdef __BIG_ENDIAN
6077 				(2 << 0) |
6078 #endif
6079 				(ib->gpu_addr & 0xFFFFFFFC));
6080 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6081 	amdgpu_ring_write(ring, control);
6082 }
6083 
6084 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6085 					 u64 seq, unsigned flags)
6086 {
6087 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6088 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6089 
6090 	/* EVENT_WRITE_EOP - flush caches, send int */
6091 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6092 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6093 				 EOP_TC_ACTION_EN |
6094 				 EOP_TC_WB_ACTION_EN |
6095 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6096 				 EVENT_INDEX(5)));
6097 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6098 	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6099 			  DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6100 	amdgpu_ring_write(ring, lower_32_bits(seq));
6101 	amdgpu_ring_write(ring, upper_32_bits(seq));
6102 
6103 }
6104 
6105 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6106 {
6107 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6108 	uint32_t seq = ring->fence_drv.sync_seq;
6109 	uint64_t addr = ring->fence_drv.gpu_addr;
6110 
6111 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6112 	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6113 				 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6114 				 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6115 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6116 	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6117 	amdgpu_ring_write(ring, seq);
6118 	amdgpu_ring_write(ring, 0xffffffff);
6119 	amdgpu_ring_write(ring, 4); /* poll interval */
6120 }
6121 
6122 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6123 					unsigned vm_id, uint64_t pd_addr)
6124 {
6125 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6126 
6127 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6128 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6129 				 WRITE_DATA_DST_SEL(0)) |
6130 				 WR_CONFIRM);
6131 	if (vm_id < 8) {
6132 		amdgpu_ring_write(ring,
6133 				  (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6134 	} else {
6135 		amdgpu_ring_write(ring,
6136 				  (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6137 	}
6138 	amdgpu_ring_write(ring, 0);
6139 	amdgpu_ring_write(ring, pd_addr >> 12);
6140 
6141 	/* bits 0-15 are the VM contexts0-15 */
6142 	/* invalidate the cache */
6143 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6144 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6145 				 WRITE_DATA_DST_SEL(0)));
6146 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6147 	amdgpu_ring_write(ring, 0);
6148 	amdgpu_ring_write(ring, 1 << vm_id);
6149 
6150 	/* wait for the invalidate to complete */
6151 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6152 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6153 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6154 				 WAIT_REG_MEM_ENGINE(0))); /* me */
6155 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6156 	amdgpu_ring_write(ring, 0);
6157 	amdgpu_ring_write(ring, 0); /* ref */
6158 	amdgpu_ring_write(ring, 0); /* mask */
6159 	amdgpu_ring_write(ring, 0x20); /* poll interval */
6160 
6161 	/* compute doesn't have PFP */
6162 	if (usepfp) {
6163 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6164 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6165 		amdgpu_ring_write(ring, 0x0);
6166 		/* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */
6167 		amdgpu_ring_insert_nop(ring, 128);
6168 	}
6169 }
6170 
6171 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6172 {
6173 	return ring->adev->wb.wb[ring->wptr_offs];
6174 }
6175 
6176 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6177 {
6178 	struct amdgpu_device *adev = ring->adev;
6179 
6180 	/* XXX check if swapping is necessary on BE */
6181 	adev->wb.wb[ring->wptr_offs] = ring->wptr;
6182 	WDOORBELL32(ring->doorbell_index, ring->wptr);
6183 }
6184 
6185 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6186 					     u64 addr, u64 seq,
6187 					     unsigned flags)
6188 {
6189 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6190 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6191 
6192 	/* RELEASE_MEM - flush caches, send int */
6193 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6194 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6195 				 EOP_TC_ACTION_EN |
6196 				 EOP_TC_WB_ACTION_EN |
6197 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6198 				 EVENT_INDEX(5)));
6199 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6200 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6201 	amdgpu_ring_write(ring, upper_32_bits(addr));
6202 	amdgpu_ring_write(ring, lower_32_bits(seq));
6203 	amdgpu_ring_write(ring, upper_32_bits(seq));
6204 }
6205 
6206 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6207 {
6208 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6209 	amdgpu_ring_write(ring, 0);
6210 }
6211 
6212 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6213 {
6214 	uint32_t dw2 = 0;
6215 
6216 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6217 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6218 		gfx_v8_0_ring_emit_vgt_flush(ring);
6219 		/* set load_global_config & load_global_uconfig */
6220 		dw2 |= 0x8001;
6221 		/* set load_cs_sh_regs */
6222 		dw2 |= 0x01000000;
6223 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
6224 		dw2 |= 0x10002;
6225 
6226 		/* set load_ce_ram if preamble presented */
6227 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6228 			dw2 |= 0x10000000;
6229 	} else {
6230 		/* still load_ce_ram if this is the first time preamble presented
6231 		 * although there is no context switch happens.
6232 		 */
6233 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6234 			dw2 |= 0x10000000;
6235 	}
6236 
6237 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6238 	amdgpu_ring_write(ring, dw2);
6239 	amdgpu_ring_write(ring, 0);
6240 }
6241 
6242 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6243 						 enum amdgpu_interrupt_state state)
6244 {
6245 	WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6246 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6247 }
6248 
6249 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6250 						     int me, int pipe,
6251 						     enum amdgpu_interrupt_state state)
6252 {
6253 	/*
6254 	 * amdgpu controls only pipe 0 of MEC1. That's why this function only
6255 	 * handles the setting of interrupts for this specific pipe. All other
6256 	 * pipes' interrupts are set by amdkfd.
6257 	 */
6258 
6259 	if (me == 1) {
6260 		switch (pipe) {
6261 		case 0:
6262 			break;
6263 		default:
6264 			DRM_DEBUG("invalid pipe %d\n", pipe);
6265 			return;
6266 		}
6267 	} else {
6268 		DRM_DEBUG("invalid me %d\n", me);
6269 		return;
6270 	}
6271 
6272 	WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE,
6273 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6274 }
6275 
6276 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6277 					     struct amdgpu_irq_src *source,
6278 					     unsigned type,
6279 					     enum amdgpu_interrupt_state state)
6280 {
6281 	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6282 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6283 
6284 	return 0;
6285 }
6286 
6287 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6288 					      struct amdgpu_irq_src *source,
6289 					      unsigned type,
6290 					      enum amdgpu_interrupt_state state)
6291 {
6292 	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6293 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6294 
6295 	return 0;
6296 }
6297 
6298 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6299 					    struct amdgpu_irq_src *src,
6300 					    unsigned type,
6301 					    enum amdgpu_interrupt_state state)
6302 {
6303 	switch (type) {
6304 	case AMDGPU_CP_IRQ_GFX_EOP:
6305 		gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6306 		break;
6307 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6308 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6309 		break;
6310 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6311 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6312 		break;
6313 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6314 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6315 		break;
6316 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6317 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6318 		break;
6319 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6320 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6321 		break;
6322 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6323 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6324 		break;
6325 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6326 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6327 		break;
6328 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6329 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6330 		break;
6331 	default:
6332 		break;
6333 	}
6334 	return 0;
6335 }
6336 
6337 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6338 			    struct amdgpu_irq_src *source,
6339 			    struct amdgpu_iv_entry *entry)
6340 {
6341 	int i;
6342 	u8 me_id, pipe_id, queue_id;
6343 	struct amdgpu_ring *ring;
6344 
6345 	DRM_DEBUG("IH: CP EOP\n");
6346 	me_id = (entry->ring_id & 0x0c) >> 2;
6347 	pipe_id = (entry->ring_id & 0x03) >> 0;
6348 	queue_id = (entry->ring_id & 0x70) >> 4;
6349 
6350 	switch (me_id) {
6351 	case 0:
6352 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6353 		break;
6354 	case 1:
6355 	case 2:
6356 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6357 			ring = &adev->gfx.compute_ring[i];
6358 			/* Per-queue interrupt is supported for MEC starting from VI.
6359 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
6360 			  */
6361 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6362 				amdgpu_fence_process(ring);
6363 		}
6364 		break;
6365 	}
6366 	return 0;
6367 }
6368 
6369 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6370 				 struct amdgpu_irq_src *source,
6371 				 struct amdgpu_iv_entry *entry)
6372 {
6373 	DRM_ERROR("Illegal register access in command stream\n");
6374 	schedule_work(&adev->reset_work);
6375 	return 0;
6376 }
6377 
6378 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6379 				  struct amdgpu_irq_src *source,
6380 				  struct amdgpu_iv_entry *entry)
6381 {
6382 	DRM_ERROR("Illegal instruction in command stream\n");
6383 	schedule_work(&adev->reset_work);
6384 	return 0;
6385 }
6386 
6387 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6388 	.name = "gfx_v8_0",
6389 	.early_init = gfx_v8_0_early_init,
6390 	.late_init = gfx_v8_0_late_init,
6391 	.sw_init = gfx_v8_0_sw_init,
6392 	.sw_fini = gfx_v8_0_sw_fini,
6393 	.hw_init = gfx_v8_0_hw_init,
6394 	.hw_fini = gfx_v8_0_hw_fini,
6395 	.suspend = gfx_v8_0_suspend,
6396 	.resume = gfx_v8_0_resume,
6397 	.is_idle = gfx_v8_0_is_idle,
6398 	.wait_for_idle = gfx_v8_0_wait_for_idle,
6399 	.check_soft_reset = gfx_v8_0_check_soft_reset,
6400 	.pre_soft_reset = gfx_v8_0_pre_soft_reset,
6401 	.soft_reset = gfx_v8_0_soft_reset,
6402 	.post_soft_reset = gfx_v8_0_post_soft_reset,
6403 	.set_clockgating_state = gfx_v8_0_set_clockgating_state,
6404 	.set_powergating_state = gfx_v8_0_set_powergating_state,
6405 };
6406 
6407 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6408 	.type = AMDGPU_RING_TYPE_GFX,
6409 	.align_mask = 0xff,
6410 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6411 	.get_rptr = gfx_v8_0_ring_get_rptr,
6412 	.get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6413 	.set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6414 	.emit_frame_size =
6415 		20 + /* gfx_v8_0_ring_emit_gds_switch */
6416 		7 + /* gfx_v8_0_ring_emit_hdp_flush */
6417 		5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6418 		6 + 6 + 6 +/* gfx_v8_0_ring_emit_fence_gfx x3 for user fence, vm fence */
6419 		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6420 		128 + 19 + /* gfx_v8_0_ring_emit_vm_flush */
6421 		2 + /* gfx_v8_ring_emit_sb */
6422 		3 + 4, /* gfx_v8_ring_emit_cntxcntl including vgt flush */
6423 	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_gfx */
6424 	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6425 	.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6426 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6427 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6428 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6429 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6430 	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6431 	.test_ring = gfx_v8_0_ring_test_ring,
6432 	.test_ib = gfx_v8_0_ring_test_ib,
6433 	.insert_nop = amdgpu_ring_insert_nop,
6434 	.pad_ib = amdgpu_ring_generic_pad_ib,
6435 	.emit_switch_buffer = gfx_v8_ring_emit_sb,
6436 	.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6437 };
6438 
6439 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6440 	.type = AMDGPU_RING_TYPE_COMPUTE,
6441 	.align_mask = 0xff,
6442 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6443 	.get_rptr = gfx_v8_0_ring_get_rptr,
6444 	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
6445 	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
6446 	.emit_frame_size =
6447 		20 + /* gfx_v8_0_ring_emit_gds_switch */
6448 		7 + /* gfx_v8_0_ring_emit_hdp_flush */
6449 		5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6450 		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6451 		17 + /* gfx_v8_0_ring_emit_vm_flush */
6452 		7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6453 	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_compute */
6454 	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
6455 	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
6456 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6457 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6458 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6459 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6460 	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6461 	.test_ring = gfx_v8_0_ring_test_ring,
6462 	.test_ib = gfx_v8_0_ring_test_ib,
6463 	.insert_nop = amdgpu_ring_insert_nop,
6464 	.pad_ib = amdgpu_ring_generic_pad_ib,
6465 };
6466 
6467 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6468 {
6469 	int i;
6470 
6471 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6472 		adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6473 
6474 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6475 		adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6476 }
6477 
6478 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6479 	.set = gfx_v8_0_set_eop_interrupt_state,
6480 	.process = gfx_v8_0_eop_irq,
6481 };
6482 
6483 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6484 	.set = gfx_v8_0_set_priv_reg_fault_state,
6485 	.process = gfx_v8_0_priv_reg_irq,
6486 };
6487 
6488 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6489 	.set = gfx_v8_0_set_priv_inst_fault_state,
6490 	.process = gfx_v8_0_priv_inst_irq,
6491 };
6492 
6493 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6494 {
6495 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6496 	adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6497 
6498 	adev->gfx.priv_reg_irq.num_types = 1;
6499 	adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6500 
6501 	adev->gfx.priv_inst_irq.num_types = 1;
6502 	adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6503 }
6504 
6505 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6506 {
6507 	switch (adev->asic_type) {
6508 	case CHIP_TOPAZ:
6509 		adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6510 		break;
6511 	case CHIP_STONEY:
6512 	case CHIP_CARRIZO:
6513 		adev->gfx.rlc.funcs = &cz_rlc_funcs;
6514 		break;
6515 	default:
6516 		adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
6517 		break;
6518 	}
6519 }
6520 
6521 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6522 {
6523 	/* init asci gds info */
6524 	adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6525 	adev->gds.gws.total_size = 64;
6526 	adev->gds.oa.total_size = 16;
6527 
6528 	if (adev->gds.mem.total_size == 64 * 1024) {
6529 		adev->gds.mem.gfx_partition_size = 4096;
6530 		adev->gds.mem.cs_partition_size = 4096;
6531 
6532 		adev->gds.gws.gfx_partition_size = 4;
6533 		adev->gds.gws.cs_partition_size = 4;
6534 
6535 		adev->gds.oa.gfx_partition_size = 4;
6536 		adev->gds.oa.cs_partition_size = 1;
6537 	} else {
6538 		adev->gds.mem.gfx_partition_size = 1024;
6539 		adev->gds.mem.cs_partition_size = 1024;
6540 
6541 		adev->gds.gws.gfx_partition_size = 16;
6542 		adev->gds.gws.cs_partition_size = 16;
6543 
6544 		adev->gds.oa.gfx_partition_size = 4;
6545 		adev->gds.oa.cs_partition_size = 4;
6546 	}
6547 }
6548 
6549 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6550 						 u32 bitmap)
6551 {
6552 	u32 data;
6553 
6554 	if (!bitmap)
6555 		return;
6556 
6557 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6558 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6559 
6560 	WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6561 }
6562 
6563 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6564 {
6565 	u32 data, mask;
6566 
6567 	data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
6568 		RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6569 
6570 	mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
6571 
6572 	return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
6573 }
6574 
6575 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6576 {
6577 	int i, j, k, counter, active_cu_number = 0;
6578 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6579 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6580 	unsigned disable_masks[4 * 2];
6581 
6582 	memset(cu_info, 0, sizeof(*cu_info));
6583 
6584 	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
6585 
6586 	mutex_lock(&adev->grbm_idx_mutex);
6587 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6588 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6589 			mask = 1;
6590 			ao_bitmap = 0;
6591 			counter = 0;
6592 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
6593 			if (i < 4 && j < 2)
6594 				gfx_v8_0_set_user_cu_inactive_bitmap(
6595 					adev, disable_masks[i * 2 + j]);
6596 			bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6597 			cu_info->bitmap[i][j] = bitmap;
6598 
6599 			for (k = 0; k < 16; k ++) {
6600 				if (bitmap & mask) {
6601 					if (counter < 2)
6602 						ao_bitmap |= mask;
6603 					counter ++;
6604 				}
6605 				mask <<= 1;
6606 			}
6607 			active_cu_number += counter;
6608 			ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6609 		}
6610 	}
6611 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6612 	mutex_unlock(&adev->grbm_idx_mutex);
6613 
6614 	cu_info->number = active_cu_number;
6615 	cu_info->ao_cu_mask = ao_cu_mask;
6616 }
6617 
6618 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
6619 {
6620 	.type = AMD_IP_BLOCK_TYPE_GFX,
6621 	.major = 8,
6622 	.minor = 0,
6623 	.rev = 0,
6624 	.funcs = &gfx_v8_0_ip_funcs,
6625 };
6626 
6627 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
6628 {
6629 	.type = AMD_IP_BLOCK_TYPE_GFX,
6630 	.major = 8,
6631 	.minor = 1,
6632 	.rev = 0,
6633 	.funcs = &gfx_v8_0_ip_funcs,
6634 };
6635