xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c (revision a6ca5ac746d104019e76c29e69c2a1fc6dd2b29f)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vi_structs.h"
29 #include "vid.h"
30 #include "amdgpu_ucode.h"
31 #include "amdgpu_atombios.h"
32 #include "atombios_i2c.h"
33 #include "clearstate_vi.h"
34 
35 #include "gmc/gmc_8_2_d.h"
36 #include "gmc/gmc_8_2_sh_mask.h"
37 
38 #include "oss/oss_3_0_d.h"
39 #include "oss/oss_3_0_sh_mask.h"
40 
41 #include "bif/bif_5_0_d.h"
42 #include "bif/bif_5_0_sh_mask.h"
43 
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
48 
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
51 
52 #include "smu/smu_7_1_3_d.h"
53 
54 #define GFX8_NUM_GFX_RINGS     1
55 #define GFX8_NUM_COMPUTE_RINGS 8
56 
57 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
59 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
60 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
61 
62 #define ARRAY_MODE(x)					((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
63 #define PIPE_CONFIG(x)					((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
64 #define TILE_SPLIT(x)					((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
65 #define MICRO_TILE_MODE_NEW(x)				((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
66 #define SAMPLE_SPLIT(x)					((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
67 #define BANK_WIDTH(x)					((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
68 #define BANK_HEIGHT(x)					((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
69 #define MACRO_TILE_ASPECT(x)				((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
70 #define NUM_BANKS(x)					((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
71 
72 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
73 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
74 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
76 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
77 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
78 
79 /* BPM SERDES CMD */
80 #define SET_BPM_SERDES_CMD    1
81 #define CLE_BPM_SERDES_CMD    0
82 
83 /* BPM Register Address*/
84 enum {
85 	BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
86 	BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
87 	BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
88 	BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
89 	BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
90 	BPM_REG_FGCG_MAX
91 };
92 
93 #define RLC_FormatDirectRegListLength        14
94 
95 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
101 
102 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
106 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
107 
108 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
114 
115 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
119 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
120 
121 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
127 
128 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
133 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
134 
135 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
141 
142 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
143 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
144 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
145 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
146 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
147 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
148 
149 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
150 {
151 	{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
152 	{mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
153 	{mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
154 	{mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
155 	{mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
156 	{mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
157 	{mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
158 	{mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
159 	{mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
160 	{mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
161 	{mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
162 	{mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
163 	{mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
164 	{mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
165 	{mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
166 	{mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
167 };
168 
169 static const u32 golden_settings_tonga_a11[] =
170 {
171 	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
172 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
173 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
174 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
175 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
176 	mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
177 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
178 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
179 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
180 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
181 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
182 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
183 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
184 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
185 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
186 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
187 };
188 
189 static const u32 tonga_golden_common_all[] =
190 {
191 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
192 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
193 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
194 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
195 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
196 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
197 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
198 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
199 };
200 
201 static const u32 tonga_mgcg_cgcg_init[] =
202 {
203 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
204 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
205 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
206 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
207 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
208 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
209 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
210 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
211 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
212 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
213 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
214 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
215 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
216 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
217 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
218 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
219 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
220 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
221 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
222 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
223 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
224 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
225 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
226 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
227 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
228 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
229 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
230 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
231 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
232 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
233 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
234 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
235 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
236 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
237 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
238 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
239 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
240 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
241 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
242 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
243 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
244 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
245 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
246 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
247 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
248 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
249 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
250 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
251 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
252 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
253 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
254 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
255 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
256 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
257 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
258 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
259 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
260 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
261 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
262 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
263 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
264 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
265 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
266 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
267 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
268 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
269 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
270 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
271 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
272 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
273 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
274 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
275 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
276 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
277 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
278 };
279 
280 static const u32 golden_settings_polaris11_a11[] =
281 {
282 	mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
283 	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
284 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
285 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
286 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
287 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
288 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
289 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
290 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
291 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
292 	mmSQ_CONFIG, 0x07f80000, 0x01180000,
293 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
294 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
295 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
296 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
297 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
298 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
299 };
300 
301 static const u32 polaris11_golden_common_all[] =
302 {
303 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
304 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
305 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
306 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
307 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
308 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
309 };
310 
311 static const u32 golden_settings_polaris10_a11[] =
312 {
313 	mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
314 	mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
315 	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
316 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
317 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
318 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
319 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
320 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
321 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
322 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
323 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
324 	mmSQ_CONFIG, 0x07f80000, 0x07180000,
325 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
326 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
327 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
328 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
329 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
330 };
331 
332 static const u32 polaris10_golden_common_all[] =
333 {
334 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
335 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
336 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
337 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
338 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
339 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
340 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
341 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
342 };
343 
344 static const u32 fiji_golden_common_all[] =
345 {
346 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
347 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
348 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
349 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
350 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
351 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
352 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
353 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
354 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
355 	mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
356 };
357 
358 static const u32 golden_settings_fiji_a10[] =
359 {
360 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
361 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
362 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
363 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
364 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
365 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
366 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
367 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
368 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
369 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
370 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
371 };
372 
373 static const u32 fiji_mgcg_cgcg_init[] =
374 {
375 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
376 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
377 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
378 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
379 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
380 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
381 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
382 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
383 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
384 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
385 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
386 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
387 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
388 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
389 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
390 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
391 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
392 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
393 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
394 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
395 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
396 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
397 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
398 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
399 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
400 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
401 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
402 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
403 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
404 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
405 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
406 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
407 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
408 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
409 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
410 };
411 
412 static const u32 golden_settings_iceland_a11[] =
413 {
414 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
415 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
416 	mmDB_DEBUG3, 0xc0000000, 0xc0000000,
417 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
418 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
419 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
420 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
421 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
422 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
423 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
424 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
425 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
426 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
427 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
428 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
429 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
430 };
431 
432 static const u32 iceland_golden_common_all[] =
433 {
434 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
435 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
436 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
437 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
438 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
439 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
440 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
441 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
442 };
443 
444 static const u32 iceland_mgcg_cgcg_init[] =
445 {
446 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
447 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
448 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
449 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
450 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
451 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
452 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
453 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
454 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
455 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
456 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
457 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
458 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
459 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
460 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
461 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
462 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
463 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
464 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
465 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
466 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
467 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
468 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
469 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
470 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
471 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
472 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
473 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
474 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
475 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
476 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
477 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
478 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
479 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
480 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
481 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
482 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
483 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
484 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
485 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
486 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
487 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
488 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
489 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
490 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
491 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
492 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
493 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
494 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
495 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
496 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
497 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
498 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
499 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
500 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
501 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
502 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
503 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
504 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
505 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
506 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
507 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
508 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
509 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
510 };
511 
512 static const u32 cz_golden_settings_a11[] =
513 {
514 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
515 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
516 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
517 	mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
518 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
519 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
520 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
521 	mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
522 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
523 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
524 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
525 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
526 };
527 
528 static const u32 cz_golden_common_all[] =
529 {
530 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
531 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
532 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
533 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
534 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
535 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
536 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
537 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
538 };
539 
540 static const u32 cz_mgcg_cgcg_init[] =
541 {
542 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
543 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
544 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
545 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
546 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
547 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
548 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
549 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
550 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
551 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
552 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
553 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
554 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
555 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
556 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
557 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
558 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
559 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
560 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
561 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
562 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
563 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
564 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
565 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
566 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
567 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
568 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
569 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
570 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
571 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
572 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
573 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
574 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
575 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
576 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
577 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
578 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
579 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
580 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
581 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
582 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
583 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
584 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
585 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
586 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
587 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
588 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
589 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
590 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
591 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
592 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
593 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
594 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
595 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
596 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
597 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
598 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
599 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
600 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
601 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
602 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
603 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
604 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
605 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
606 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
607 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
608 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
609 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
610 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
611 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
612 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
613 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
614 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
615 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
616 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
617 };
618 
619 static const u32 stoney_golden_settings_a11[] =
620 {
621 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
622 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
623 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
624 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
625 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
626 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
627 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
628 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
629 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
630 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
631 };
632 
633 static const u32 stoney_golden_common_all[] =
634 {
635 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
636 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
637 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
638 	mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
639 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
640 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
641 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
642 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
643 };
644 
645 static const u32 stoney_mgcg_cgcg_init[] =
646 {
647 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
648 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
649 	mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
650 	mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
651 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
652 };
653 
654 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
655 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
656 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
657 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
658 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
659 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
660 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
661 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
662 static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev);
663 static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev);
664 
665 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
666 {
667 	switch (adev->asic_type) {
668 	case CHIP_TOPAZ:
669 		amdgpu_program_register_sequence(adev,
670 						 iceland_mgcg_cgcg_init,
671 						 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
672 		amdgpu_program_register_sequence(adev,
673 						 golden_settings_iceland_a11,
674 						 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
675 		amdgpu_program_register_sequence(adev,
676 						 iceland_golden_common_all,
677 						 (const u32)ARRAY_SIZE(iceland_golden_common_all));
678 		break;
679 	case CHIP_FIJI:
680 		amdgpu_program_register_sequence(adev,
681 						 fiji_mgcg_cgcg_init,
682 						 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
683 		amdgpu_program_register_sequence(adev,
684 						 golden_settings_fiji_a10,
685 						 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
686 		amdgpu_program_register_sequence(adev,
687 						 fiji_golden_common_all,
688 						 (const u32)ARRAY_SIZE(fiji_golden_common_all));
689 		break;
690 
691 	case CHIP_TONGA:
692 		amdgpu_program_register_sequence(adev,
693 						 tonga_mgcg_cgcg_init,
694 						 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
695 		amdgpu_program_register_sequence(adev,
696 						 golden_settings_tonga_a11,
697 						 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
698 		amdgpu_program_register_sequence(adev,
699 						 tonga_golden_common_all,
700 						 (const u32)ARRAY_SIZE(tonga_golden_common_all));
701 		break;
702 	case CHIP_POLARIS11:
703 	case CHIP_POLARIS12:
704 		amdgpu_program_register_sequence(adev,
705 						 golden_settings_polaris11_a11,
706 						 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
707 		amdgpu_program_register_sequence(adev,
708 						 polaris11_golden_common_all,
709 						 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
710 		break;
711 	case CHIP_POLARIS10:
712 		amdgpu_program_register_sequence(adev,
713 						 golden_settings_polaris10_a11,
714 						 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
715 		amdgpu_program_register_sequence(adev,
716 						 polaris10_golden_common_all,
717 						 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
718 		WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
719 		if (adev->pdev->revision == 0xc7 &&
720 		    ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
721 		     (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
722 		     (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
723 			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
724 			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
725 		}
726 		break;
727 	case CHIP_CARRIZO:
728 		amdgpu_program_register_sequence(adev,
729 						 cz_mgcg_cgcg_init,
730 						 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
731 		amdgpu_program_register_sequence(adev,
732 						 cz_golden_settings_a11,
733 						 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
734 		amdgpu_program_register_sequence(adev,
735 						 cz_golden_common_all,
736 						 (const u32)ARRAY_SIZE(cz_golden_common_all));
737 		break;
738 	case CHIP_STONEY:
739 		amdgpu_program_register_sequence(adev,
740 						 stoney_mgcg_cgcg_init,
741 						 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
742 		amdgpu_program_register_sequence(adev,
743 						 stoney_golden_settings_a11,
744 						 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
745 		amdgpu_program_register_sequence(adev,
746 						 stoney_golden_common_all,
747 						 (const u32)ARRAY_SIZE(stoney_golden_common_all));
748 		break;
749 	default:
750 		break;
751 	}
752 }
753 
754 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
755 {
756 	adev->gfx.scratch.num_reg = 7;
757 	adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
758 	adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
759 }
760 
761 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
762 {
763 	struct amdgpu_device *adev = ring->adev;
764 	uint32_t scratch;
765 	uint32_t tmp = 0;
766 	unsigned i;
767 	int r;
768 
769 	r = amdgpu_gfx_scratch_get(adev, &scratch);
770 	if (r) {
771 		DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
772 		return r;
773 	}
774 	WREG32(scratch, 0xCAFEDEAD);
775 	r = amdgpu_ring_alloc(ring, 3);
776 	if (r) {
777 		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
778 			  ring->idx, r);
779 		amdgpu_gfx_scratch_free(adev, scratch);
780 		return r;
781 	}
782 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
783 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
784 	amdgpu_ring_write(ring, 0xDEADBEEF);
785 	amdgpu_ring_commit(ring);
786 
787 	for (i = 0; i < adev->usec_timeout; i++) {
788 		tmp = RREG32(scratch);
789 		if (tmp == 0xDEADBEEF)
790 			break;
791 		DRM_UDELAY(1);
792 	}
793 	if (i < adev->usec_timeout) {
794 		DRM_INFO("ring test on %d succeeded in %d usecs\n",
795 			 ring->idx, i);
796 	} else {
797 		DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
798 			  ring->idx, scratch, tmp);
799 		r = -EINVAL;
800 	}
801 	amdgpu_gfx_scratch_free(adev, scratch);
802 	return r;
803 }
804 
805 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
806 {
807 	struct amdgpu_device *adev = ring->adev;
808 	struct amdgpu_ib ib;
809 	struct dma_fence *f = NULL;
810 	uint32_t scratch;
811 	uint32_t tmp = 0;
812 	long r;
813 
814 	r = amdgpu_gfx_scratch_get(adev, &scratch);
815 	if (r) {
816 		DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
817 		return r;
818 	}
819 	WREG32(scratch, 0xCAFEDEAD);
820 	memset(&ib, 0, sizeof(ib));
821 	r = amdgpu_ib_get(adev, NULL, 256, &ib);
822 	if (r) {
823 		DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
824 		goto err1;
825 	}
826 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
827 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
828 	ib.ptr[2] = 0xDEADBEEF;
829 	ib.length_dw = 3;
830 
831 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
832 	if (r)
833 		goto err2;
834 
835 	r = dma_fence_wait_timeout(f, false, timeout);
836 	if (r == 0) {
837 		DRM_ERROR("amdgpu: IB test timed out.\n");
838 		r = -ETIMEDOUT;
839 		goto err2;
840 	} else if (r < 0) {
841 		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
842 		goto err2;
843 	}
844 	tmp = RREG32(scratch);
845 	if (tmp == 0xDEADBEEF) {
846 		DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
847 		r = 0;
848 	} else {
849 		DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
850 			  scratch, tmp);
851 		r = -EINVAL;
852 	}
853 err2:
854 	amdgpu_ib_free(adev, &ib, NULL);
855 	dma_fence_put(f);
856 err1:
857 	amdgpu_gfx_scratch_free(adev, scratch);
858 	return r;
859 }
860 
861 
862 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
863 	release_firmware(adev->gfx.pfp_fw);
864 	adev->gfx.pfp_fw = NULL;
865 	release_firmware(adev->gfx.me_fw);
866 	adev->gfx.me_fw = NULL;
867 	release_firmware(adev->gfx.ce_fw);
868 	adev->gfx.ce_fw = NULL;
869 	release_firmware(adev->gfx.rlc_fw);
870 	adev->gfx.rlc_fw = NULL;
871 	release_firmware(adev->gfx.mec_fw);
872 	adev->gfx.mec_fw = NULL;
873 	if ((adev->asic_type != CHIP_STONEY) &&
874 	    (adev->asic_type != CHIP_TOPAZ))
875 		release_firmware(adev->gfx.mec2_fw);
876 	adev->gfx.mec2_fw = NULL;
877 
878 	kfree(adev->gfx.rlc.register_list_format);
879 }
880 
881 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
882 {
883 	const char *chip_name;
884 	char fw_name[30];
885 	int err;
886 	struct amdgpu_firmware_info *info = NULL;
887 	const struct common_firmware_header *header = NULL;
888 	const struct gfx_firmware_header_v1_0 *cp_hdr;
889 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
890 	unsigned int *tmp = NULL, i;
891 
892 	DRM_DEBUG("\n");
893 
894 	switch (adev->asic_type) {
895 	case CHIP_TOPAZ:
896 		chip_name = "topaz";
897 		break;
898 	case CHIP_TONGA:
899 		chip_name = "tonga";
900 		break;
901 	case CHIP_CARRIZO:
902 		chip_name = "carrizo";
903 		break;
904 	case CHIP_FIJI:
905 		chip_name = "fiji";
906 		break;
907 	case CHIP_POLARIS11:
908 		chip_name = "polaris11";
909 		break;
910 	case CHIP_POLARIS10:
911 		chip_name = "polaris10";
912 		break;
913 	case CHIP_POLARIS12:
914 		chip_name = "polaris12";
915 		break;
916 	case CHIP_STONEY:
917 		chip_name = "stoney";
918 		break;
919 	default:
920 		BUG();
921 	}
922 
923 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
924 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
925 	if (err)
926 		goto out;
927 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
928 	if (err)
929 		goto out;
930 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
931 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
932 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
933 
934 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
935 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
936 	if (err)
937 		goto out;
938 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
939 	if (err)
940 		goto out;
941 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
942 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
943 
944 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
945 
946 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
947 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
948 	if (err)
949 		goto out;
950 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
951 	if (err)
952 		goto out;
953 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
954 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
955 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
956 
957 	/*
958 	 * Support for MCBP/Virtualization in combination with chained IBs is
959 	 * formal released on feature version #46
960 	 */
961 	if (adev->gfx.ce_feature_version >= 46 &&
962 	    adev->gfx.pfp_feature_version >= 46) {
963 		adev->virt.chained_ib_support = true;
964 		DRM_INFO("Chained IB support enabled!\n");
965 	} else
966 		adev->virt.chained_ib_support = false;
967 
968 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
969 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
970 	if (err)
971 		goto out;
972 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
973 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
974 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
975 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
976 
977 	adev->gfx.rlc.save_and_restore_offset =
978 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
979 	adev->gfx.rlc.clear_state_descriptor_offset =
980 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
981 	adev->gfx.rlc.avail_scratch_ram_locations =
982 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
983 	adev->gfx.rlc.reg_restore_list_size =
984 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
985 	adev->gfx.rlc.reg_list_format_start =
986 			le32_to_cpu(rlc_hdr->reg_list_format_start);
987 	adev->gfx.rlc.reg_list_format_separate_start =
988 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
989 	adev->gfx.rlc.starting_offsets_start =
990 			le32_to_cpu(rlc_hdr->starting_offsets_start);
991 	adev->gfx.rlc.reg_list_format_size_bytes =
992 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
993 	adev->gfx.rlc.reg_list_size_bytes =
994 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
995 
996 	adev->gfx.rlc.register_list_format =
997 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
998 					adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
999 
1000 	if (!adev->gfx.rlc.register_list_format) {
1001 		err = -ENOMEM;
1002 		goto out;
1003 	}
1004 
1005 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1006 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1007 	for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1008 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
1009 
1010 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1011 
1012 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1013 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1014 	for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1015 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1016 
1017 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1018 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1019 	if (err)
1020 		goto out;
1021 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1022 	if (err)
1023 		goto out;
1024 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1025 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1026 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1027 
1028 	if ((adev->asic_type != CHIP_STONEY) &&
1029 	    (adev->asic_type != CHIP_TOPAZ)) {
1030 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1031 		err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1032 		if (!err) {
1033 			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1034 			if (err)
1035 				goto out;
1036 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1037 				adev->gfx.mec2_fw->data;
1038 			adev->gfx.mec2_fw_version =
1039 				le32_to_cpu(cp_hdr->header.ucode_version);
1040 			adev->gfx.mec2_feature_version =
1041 				le32_to_cpu(cp_hdr->ucode_feature_version);
1042 		} else {
1043 			err = 0;
1044 			adev->gfx.mec2_fw = NULL;
1045 		}
1046 	}
1047 
1048 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
1049 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1050 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1051 		info->fw = adev->gfx.pfp_fw;
1052 		header = (const struct common_firmware_header *)info->fw->data;
1053 		adev->firmware.fw_size +=
1054 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1055 
1056 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1057 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1058 		info->fw = adev->gfx.me_fw;
1059 		header = (const struct common_firmware_header *)info->fw->data;
1060 		adev->firmware.fw_size +=
1061 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1062 
1063 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1064 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1065 		info->fw = adev->gfx.ce_fw;
1066 		header = (const struct common_firmware_header *)info->fw->data;
1067 		adev->firmware.fw_size +=
1068 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1069 
1070 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1071 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1072 		info->fw = adev->gfx.rlc_fw;
1073 		header = (const struct common_firmware_header *)info->fw->data;
1074 		adev->firmware.fw_size +=
1075 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1076 
1077 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1078 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1079 		info->fw = adev->gfx.mec_fw;
1080 		header = (const struct common_firmware_header *)info->fw->data;
1081 		adev->firmware.fw_size +=
1082 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1083 
1084 		/* we need account JT in */
1085 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1086 		adev->firmware.fw_size +=
1087 			ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1088 
1089 		if (amdgpu_sriov_vf(adev)) {
1090 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1091 			info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1092 			info->fw = adev->gfx.mec_fw;
1093 			adev->firmware.fw_size +=
1094 				ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1095 		}
1096 
1097 		if (adev->gfx.mec2_fw) {
1098 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1099 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1100 			info->fw = adev->gfx.mec2_fw;
1101 			header = (const struct common_firmware_header *)info->fw->data;
1102 			adev->firmware.fw_size +=
1103 				ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1104 		}
1105 
1106 	}
1107 
1108 out:
1109 	if (err) {
1110 		dev_err(adev->dev,
1111 			"gfx8: Failed to load firmware \"%s\"\n",
1112 			fw_name);
1113 		release_firmware(adev->gfx.pfp_fw);
1114 		adev->gfx.pfp_fw = NULL;
1115 		release_firmware(adev->gfx.me_fw);
1116 		adev->gfx.me_fw = NULL;
1117 		release_firmware(adev->gfx.ce_fw);
1118 		adev->gfx.ce_fw = NULL;
1119 		release_firmware(adev->gfx.rlc_fw);
1120 		adev->gfx.rlc_fw = NULL;
1121 		release_firmware(adev->gfx.mec_fw);
1122 		adev->gfx.mec_fw = NULL;
1123 		release_firmware(adev->gfx.mec2_fw);
1124 		adev->gfx.mec2_fw = NULL;
1125 	}
1126 	return err;
1127 }
1128 
1129 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1130 				    volatile u32 *buffer)
1131 {
1132 	u32 count = 0, i;
1133 	const struct cs_section_def *sect = NULL;
1134 	const struct cs_extent_def *ext = NULL;
1135 
1136 	if (adev->gfx.rlc.cs_data == NULL)
1137 		return;
1138 	if (buffer == NULL)
1139 		return;
1140 
1141 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1142 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1143 
1144 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1145 	buffer[count++] = cpu_to_le32(0x80000000);
1146 	buffer[count++] = cpu_to_le32(0x80000000);
1147 
1148 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1149 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1150 			if (sect->id == SECT_CONTEXT) {
1151 				buffer[count++] =
1152 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1153 				buffer[count++] = cpu_to_le32(ext->reg_index -
1154 						PACKET3_SET_CONTEXT_REG_START);
1155 				for (i = 0; i < ext->reg_count; i++)
1156 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1157 			} else {
1158 				return;
1159 			}
1160 		}
1161 	}
1162 
1163 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1164 	buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1165 			PACKET3_SET_CONTEXT_REG_START);
1166 	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1167 	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1168 
1169 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1170 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1171 
1172 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1173 	buffer[count++] = cpu_to_le32(0);
1174 }
1175 
1176 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1177 {
1178 	const __le32 *fw_data;
1179 	volatile u32 *dst_ptr;
1180 	int me, i, max_me = 4;
1181 	u32 bo_offset = 0;
1182 	u32 table_offset, table_size;
1183 
1184 	if (adev->asic_type == CHIP_CARRIZO)
1185 		max_me = 5;
1186 
1187 	/* write the cp table buffer */
1188 	dst_ptr = adev->gfx.rlc.cp_table_ptr;
1189 	for (me = 0; me < max_me; me++) {
1190 		if (me == 0) {
1191 			const struct gfx_firmware_header_v1_0 *hdr =
1192 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1193 			fw_data = (const __le32 *)
1194 				(adev->gfx.ce_fw->data +
1195 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1196 			table_offset = le32_to_cpu(hdr->jt_offset);
1197 			table_size = le32_to_cpu(hdr->jt_size);
1198 		} else if (me == 1) {
1199 			const struct gfx_firmware_header_v1_0 *hdr =
1200 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1201 			fw_data = (const __le32 *)
1202 				(adev->gfx.pfp_fw->data +
1203 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1204 			table_offset = le32_to_cpu(hdr->jt_offset);
1205 			table_size = le32_to_cpu(hdr->jt_size);
1206 		} else if (me == 2) {
1207 			const struct gfx_firmware_header_v1_0 *hdr =
1208 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1209 			fw_data = (const __le32 *)
1210 				(adev->gfx.me_fw->data +
1211 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1212 			table_offset = le32_to_cpu(hdr->jt_offset);
1213 			table_size = le32_to_cpu(hdr->jt_size);
1214 		} else if (me == 3) {
1215 			const struct gfx_firmware_header_v1_0 *hdr =
1216 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1217 			fw_data = (const __le32 *)
1218 				(adev->gfx.mec_fw->data +
1219 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1220 			table_offset = le32_to_cpu(hdr->jt_offset);
1221 			table_size = le32_to_cpu(hdr->jt_size);
1222 		} else  if (me == 4) {
1223 			const struct gfx_firmware_header_v1_0 *hdr =
1224 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1225 			fw_data = (const __le32 *)
1226 				(adev->gfx.mec2_fw->data +
1227 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1228 			table_offset = le32_to_cpu(hdr->jt_offset);
1229 			table_size = le32_to_cpu(hdr->jt_size);
1230 		}
1231 
1232 		for (i = 0; i < table_size; i ++) {
1233 			dst_ptr[bo_offset + i] =
1234 				cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1235 		}
1236 
1237 		bo_offset += table_size;
1238 	}
1239 }
1240 
1241 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1242 {
1243 	int r;
1244 
1245 	/* clear state block */
1246 	if (adev->gfx.rlc.clear_state_obj) {
1247 		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1248 		if (unlikely(r != 0))
1249 			dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
1250 		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1251 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1252 		amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1253 		adev->gfx.rlc.clear_state_obj = NULL;
1254 	}
1255 
1256 	/* jump table block */
1257 	if (adev->gfx.rlc.cp_table_obj) {
1258 		r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, true);
1259 		if (unlikely(r != 0))
1260 			dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1261 		amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1262 		amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1263 		amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1264 		adev->gfx.rlc.cp_table_obj = NULL;
1265 	}
1266 }
1267 
1268 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1269 {
1270 	volatile u32 *dst_ptr;
1271 	u32 dws;
1272 	const struct cs_section_def *cs_data;
1273 	int r;
1274 
1275 	adev->gfx.rlc.cs_data = vi_cs_data;
1276 
1277 	cs_data = adev->gfx.rlc.cs_data;
1278 
1279 	if (cs_data) {
1280 		/* clear state block */
1281 		adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1282 
1283 		if (adev->gfx.rlc.clear_state_obj == NULL) {
1284 			r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1285 					     AMDGPU_GEM_DOMAIN_VRAM,
1286 					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1287 					     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1288 					     NULL, NULL,
1289 					     &adev->gfx.rlc.clear_state_obj);
1290 			if (r) {
1291 				dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1292 				gfx_v8_0_rlc_fini(adev);
1293 				return r;
1294 			}
1295 		}
1296 		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1297 		if (unlikely(r != 0)) {
1298 			gfx_v8_0_rlc_fini(adev);
1299 			return r;
1300 		}
1301 		r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1302 				  &adev->gfx.rlc.clear_state_gpu_addr);
1303 		if (r) {
1304 			amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1305 			dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
1306 			gfx_v8_0_rlc_fini(adev);
1307 			return r;
1308 		}
1309 
1310 		r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1311 		if (r) {
1312 			dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
1313 			gfx_v8_0_rlc_fini(adev);
1314 			return r;
1315 		}
1316 		/* set up the cs buffer */
1317 		dst_ptr = adev->gfx.rlc.cs_ptr;
1318 		gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1319 		amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1320 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1321 	}
1322 
1323 	if ((adev->asic_type == CHIP_CARRIZO) ||
1324 	    (adev->asic_type == CHIP_STONEY)) {
1325 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1326 		if (adev->gfx.rlc.cp_table_obj == NULL) {
1327 			r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1328 					     AMDGPU_GEM_DOMAIN_VRAM,
1329 					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1330 					     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1331 					     NULL, NULL,
1332 					     &adev->gfx.rlc.cp_table_obj);
1333 			if (r) {
1334 				dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1335 				return r;
1336 			}
1337 		}
1338 
1339 		r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1340 		if (unlikely(r != 0)) {
1341 			dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1342 			return r;
1343 		}
1344 		r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1345 				  &adev->gfx.rlc.cp_table_gpu_addr);
1346 		if (r) {
1347 			amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1348 			dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
1349 			return r;
1350 		}
1351 		r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1352 		if (r) {
1353 			dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1354 			return r;
1355 		}
1356 
1357 		cz_init_cp_jump_table(adev);
1358 
1359 		amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1360 		amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1361 	}
1362 
1363 	return 0;
1364 }
1365 
1366 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1367 {
1368 	int r;
1369 
1370 	if (adev->gfx.mec.hpd_eop_obj) {
1371 		r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, true);
1372 		if (unlikely(r != 0))
1373 			dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1374 		amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1375 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1376 		amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1377 		adev->gfx.mec.hpd_eop_obj = NULL;
1378 	}
1379 }
1380 
1381 static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev,
1382 				  struct amdgpu_ring *ring,
1383 				  struct amdgpu_irq_src *irq)
1384 {
1385 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1386 	int r = 0;
1387 
1388 	mutex_init(&kiq->ring_mutex);
1389 
1390 	r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs);
1391 	if (r)
1392 		return r;
1393 
1394 	ring->adev = NULL;
1395 	ring->ring_obj = NULL;
1396 	ring->use_doorbell = true;
1397 	ring->doorbell_index = AMDGPU_DOORBELL_KIQ;
1398 	if (adev->gfx.mec2_fw) {
1399 		ring->me = 2;
1400 		ring->pipe = 0;
1401 	} else {
1402 		ring->me = 1;
1403 		ring->pipe = 1;
1404 	}
1405 
1406 	ring->queue = 0;
1407 	ring->eop_gpu_addr = kiq->eop_gpu_addr;
1408 	sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue);
1409 	r = amdgpu_ring_init(adev, ring, 1024,
1410 			     irq, AMDGPU_CP_KIQ_IRQ_DRIVER0);
1411 	if (r)
1412 		dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
1413 
1414 	return r;
1415 }
1416 
1417 static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring,
1418 				   struct amdgpu_irq_src *irq)
1419 {
1420 	amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
1421 	amdgpu_ring_fini(ring);
1422 }
1423 
1424 #define MEC_HPD_SIZE 2048
1425 
1426 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1427 {
1428 	int r;
1429 	u32 *hpd;
1430 
1431 	/*
1432 	 * we assign only 1 pipe because all other pipes will
1433 	 * be handled by KFD
1434 	 */
1435 	adev->gfx.mec.num_mec = 1;
1436 	adev->gfx.mec.num_pipe = 1;
1437 	adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1438 
1439 	if (adev->gfx.mec.hpd_eop_obj == NULL) {
1440 		r = amdgpu_bo_create(adev,
1441 				     adev->gfx.mec.num_queue * MEC_HPD_SIZE,
1442 				     PAGE_SIZE, true,
1443 				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1444 				     &adev->gfx.mec.hpd_eop_obj);
1445 		if (r) {
1446 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1447 			return r;
1448 		}
1449 	}
1450 
1451 	r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1452 	if (unlikely(r != 0)) {
1453 		gfx_v8_0_mec_fini(adev);
1454 		return r;
1455 	}
1456 	r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1457 			  &adev->gfx.mec.hpd_eop_gpu_addr);
1458 	if (r) {
1459 		dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1460 		gfx_v8_0_mec_fini(adev);
1461 		return r;
1462 	}
1463 	r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1464 	if (r) {
1465 		dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1466 		gfx_v8_0_mec_fini(adev);
1467 		return r;
1468 	}
1469 
1470 	memset(hpd, 0, adev->gfx.mec.num_queue * MEC_HPD_SIZE);
1471 
1472 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1473 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1474 
1475 	return 0;
1476 }
1477 
1478 static void gfx_v8_0_kiq_fini(struct amdgpu_device *adev)
1479 {
1480 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1481 
1482 	amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
1483 }
1484 
1485 static int gfx_v8_0_kiq_init(struct amdgpu_device *adev)
1486 {
1487 	int r;
1488 	u32 *hpd;
1489 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1490 
1491 	r = amdgpu_bo_create_kernel(adev, MEC_HPD_SIZE, PAGE_SIZE,
1492 				    AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
1493 				    &kiq->eop_gpu_addr, (void **)&hpd);
1494 	if (r) {
1495 		dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
1496 		return r;
1497 	}
1498 
1499 	memset(hpd, 0, MEC_HPD_SIZE);
1500 
1501 	r = amdgpu_bo_reserve(kiq->eop_obj, true);
1502 	if (unlikely(r != 0))
1503 		dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
1504 	amdgpu_bo_kunmap(kiq->eop_obj);
1505 	amdgpu_bo_unreserve(kiq->eop_obj);
1506 
1507 	return 0;
1508 }
1509 
1510 static const u32 vgpr_init_compute_shader[] =
1511 {
1512 	0x7e000209, 0x7e020208,
1513 	0x7e040207, 0x7e060206,
1514 	0x7e080205, 0x7e0a0204,
1515 	0x7e0c0203, 0x7e0e0202,
1516 	0x7e100201, 0x7e120200,
1517 	0x7e140209, 0x7e160208,
1518 	0x7e180207, 0x7e1a0206,
1519 	0x7e1c0205, 0x7e1e0204,
1520 	0x7e200203, 0x7e220202,
1521 	0x7e240201, 0x7e260200,
1522 	0x7e280209, 0x7e2a0208,
1523 	0x7e2c0207, 0x7e2e0206,
1524 	0x7e300205, 0x7e320204,
1525 	0x7e340203, 0x7e360202,
1526 	0x7e380201, 0x7e3a0200,
1527 	0x7e3c0209, 0x7e3e0208,
1528 	0x7e400207, 0x7e420206,
1529 	0x7e440205, 0x7e460204,
1530 	0x7e480203, 0x7e4a0202,
1531 	0x7e4c0201, 0x7e4e0200,
1532 	0x7e500209, 0x7e520208,
1533 	0x7e540207, 0x7e560206,
1534 	0x7e580205, 0x7e5a0204,
1535 	0x7e5c0203, 0x7e5e0202,
1536 	0x7e600201, 0x7e620200,
1537 	0x7e640209, 0x7e660208,
1538 	0x7e680207, 0x7e6a0206,
1539 	0x7e6c0205, 0x7e6e0204,
1540 	0x7e700203, 0x7e720202,
1541 	0x7e740201, 0x7e760200,
1542 	0x7e780209, 0x7e7a0208,
1543 	0x7e7c0207, 0x7e7e0206,
1544 	0xbf8a0000, 0xbf810000,
1545 };
1546 
1547 static const u32 sgpr_init_compute_shader[] =
1548 {
1549 	0xbe8a0100, 0xbe8c0102,
1550 	0xbe8e0104, 0xbe900106,
1551 	0xbe920108, 0xbe940100,
1552 	0xbe960102, 0xbe980104,
1553 	0xbe9a0106, 0xbe9c0108,
1554 	0xbe9e0100, 0xbea00102,
1555 	0xbea20104, 0xbea40106,
1556 	0xbea60108, 0xbea80100,
1557 	0xbeaa0102, 0xbeac0104,
1558 	0xbeae0106, 0xbeb00108,
1559 	0xbeb20100, 0xbeb40102,
1560 	0xbeb60104, 0xbeb80106,
1561 	0xbeba0108, 0xbebc0100,
1562 	0xbebe0102, 0xbec00104,
1563 	0xbec20106, 0xbec40108,
1564 	0xbec60100, 0xbec80102,
1565 	0xbee60004, 0xbee70005,
1566 	0xbeea0006, 0xbeeb0007,
1567 	0xbee80008, 0xbee90009,
1568 	0xbefc0000, 0xbf8a0000,
1569 	0xbf810000, 0x00000000,
1570 };
1571 
1572 static const u32 vgpr_init_regs[] =
1573 {
1574 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1575 	mmCOMPUTE_RESOURCE_LIMITS, 0,
1576 	mmCOMPUTE_NUM_THREAD_X, 256*4,
1577 	mmCOMPUTE_NUM_THREAD_Y, 1,
1578 	mmCOMPUTE_NUM_THREAD_Z, 1,
1579 	mmCOMPUTE_PGM_RSRC2, 20,
1580 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1581 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1582 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1583 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1584 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1585 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1586 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1587 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1588 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1589 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1590 };
1591 
1592 static const u32 sgpr1_init_regs[] =
1593 {
1594 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1595 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1596 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1597 	mmCOMPUTE_NUM_THREAD_Y, 1,
1598 	mmCOMPUTE_NUM_THREAD_Z, 1,
1599 	mmCOMPUTE_PGM_RSRC2, 20,
1600 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1601 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1602 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1603 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1604 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1605 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1606 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1607 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1608 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1609 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1610 };
1611 
1612 static const u32 sgpr2_init_regs[] =
1613 {
1614 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1615 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1616 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1617 	mmCOMPUTE_NUM_THREAD_Y, 1,
1618 	mmCOMPUTE_NUM_THREAD_Z, 1,
1619 	mmCOMPUTE_PGM_RSRC2, 20,
1620 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1621 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1622 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1623 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1624 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1625 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1626 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1627 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1628 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1629 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1630 };
1631 
1632 static const u32 sec_ded_counter_registers[] =
1633 {
1634 	mmCPC_EDC_ATC_CNT,
1635 	mmCPC_EDC_SCRATCH_CNT,
1636 	mmCPC_EDC_UCODE_CNT,
1637 	mmCPF_EDC_ATC_CNT,
1638 	mmCPF_EDC_ROQ_CNT,
1639 	mmCPF_EDC_TAG_CNT,
1640 	mmCPG_EDC_ATC_CNT,
1641 	mmCPG_EDC_DMA_CNT,
1642 	mmCPG_EDC_TAG_CNT,
1643 	mmDC_EDC_CSINVOC_CNT,
1644 	mmDC_EDC_RESTORE_CNT,
1645 	mmDC_EDC_STATE_CNT,
1646 	mmGDS_EDC_CNT,
1647 	mmGDS_EDC_GRBM_CNT,
1648 	mmGDS_EDC_OA_DED,
1649 	mmSPI_EDC_CNT,
1650 	mmSQC_ATC_EDC_GATCL1_CNT,
1651 	mmSQC_EDC_CNT,
1652 	mmSQ_EDC_DED_CNT,
1653 	mmSQ_EDC_INFO,
1654 	mmSQ_EDC_SEC_CNT,
1655 	mmTCC_EDC_CNT,
1656 	mmTCP_ATC_EDC_GATCL1_CNT,
1657 	mmTCP_EDC_CNT,
1658 	mmTD_EDC_CNT
1659 };
1660 
1661 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1662 {
1663 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1664 	struct amdgpu_ib ib;
1665 	struct dma_fence *f = NULL;
1666 	int r, i;
1667 	u32 tmp;
1668 	unsigned total_size, vgpr_offset, sgpr_offset;
1669 	u64 gpu_addr;
1670 
1671 	/* only supported on CZ */
1672 	if (adev->asic_type != CHIP_CARRIZO)
1673 		return 0;
1674 
1675 	/* bail if the compute ring is not ready */
1676 	if (!ring->ready)
1677 		return 0;
1678 
1679 	tmp = RREG32(mmGB_EDC_MODE);
1680 	WREG32(mmGB_EDC_MODE, 0);
1681 
1682 	total_size =
1683 		(((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1684 	total_size +=
1685 		(((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1686 	total_size +=
1687 		(((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1688 	total_size = ALIGN(total_size, 256);
1689 	vgpr_offset = total_size;
1690 	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1691 	sgpr_offset = total_size;
1692 	total_size += sizeof(sgpr_init_compute_shader);
1693 
1694 	/* allocate an indirect buffer to put the commands in */
1695 	memset(&ib, 0, sizeof(ib));
1696 	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1697 	if (r) {
1698 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1699 		return r;
1700 	}
1701 
1702 	/* load the compute shaders */
1703 	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1704 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1705 
1706 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1707 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1708 
1709 	/* init the ib length to 0 */
1710 	ib.length_dw = 0;
1711 
1712 	/* VGPR */
1713 	/* write the register state for the compute dispatch */
1714 	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1715 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1716 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1717 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1718 	}
1719 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1720 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1721 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1722 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1723 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1724 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1725 
1726 	/* write dispatch packet */
1727 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1728 	ib.ptr[ib.length_dw++] = 8; /* x */
1729 	ib.ptr[ib.length_dw++] = 1; /* y */
1730 	ib.ptr[ib.length_dw++] = 1; /* z */
1731 	ib.ptr[ib.length_dw++] =
1732 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1733 
1734 	/* write CS partial flush packet */
1735 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1736 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1737 
1738 	/* SGPR1 */
1739 	/* write the register state for the compute dispatch */
1740 	for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1741 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1742 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1743 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1744 	}
1745 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1746 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1747 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1748 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1749 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1750 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1751 
1752 	/* write dispatch packet */
1753 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1754 	ib.ptr[ib.length_dw++] = 8; /* x */
1755 	ib.ptr[ib.length_dw++] = 1; /* y */
1756 	ib.ptr[ib.length_dw++] = 1; /* z */
1757 	ib.ptr[ib.length_dw++] =
1758 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1759 
1760 	/* write CS partial flush packet */
1761 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1762 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1763 
1764 	/* SGPR2 */
1765 	/* write the register state for the compute dispatch */
1766 	for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1767 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1768 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1769 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1770 	}
1771 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1772 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1773 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1774 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1775 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1776 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1777 
1778 	/* write dispatch packet */
1779 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1780 	ib.ptr[ib.length_dw++] = 8; /* x */
1781 	ib.ptr[ib.length_dw++] = 1; /* y */
1782 	ib.ptr[ib.length_dw++] = 1; /* z */
1783 	ib.ptr[ib.length_dw++] =
1784 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1785 
1786 	/* write CS partial flush packet */
1787 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1788 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1789 
1790 	/* shedule the ib on the ring */
1791 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1792 	if (r) {
1793 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1794 		goto fail;
1795 	}
1796 
1797 	/* wait for the GPU to finish processing the IB */
1798 	r = dma_fence_wait(f, false);
1799 	if (r) {
1800 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1801 		goto fail;
1802 	}
1803 
1804 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1805 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1806 	WREG32(mmGB_EDC_MODE, tmp);
1807 
1808 	tmp = RREG32(mmCC_GC_EDC_CONFIG);
1809 	tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1810 	WREG32(mmCC_GC_EDC_CONFIG, tmp);
1811 
1812 
1813 	/* read back registers to clear the counters */
1814 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1815 		RREG32(sec_ded_counter_registers[i]);
1816 
1817 fail:
1818 	amdgpu_ib_free(adev, &ib, NULL);
1819 	dma_fence_put(f);
1820 
1821 	return r;
1822 }
1823 
1824 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1825 {
1826 	u32 gb_addr_config;
1827 	u32 mc_shared_chmap, mc_arb_ramcfg;
1828 	u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1829 	u32 tmp;
1830 	int ret;
1831 
1832 	switch (adev->asic_type) {
1833 	case CHIP_TOPAZ:
1834 		adev->gfx.config.max_shader_engines = 1;
1835 		adev->gfx.config.max_tile_pipes = 2;
1836 		adev->gfx.config.max_cu_per_sh = 6;
1837 		adev->gfx.config.max_sh_per_se = 1;
1838 		adev->gfx.config.max_backends_per_se = 2;
1839 		adev->gfx.config.max_texture_channel_caches = 2;
1840 		adev->gfx.config.max_gprs = 256;
1841 		adev->gfx.config.max_gs_threads = 32;
1842 		adev->gfx.config.max_hw_contexts = 8;
1843 
1844 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1845 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1846 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1847 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1848 		gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1849 		break;
1850 	case CHIP_FIJI:
1851 		adev->gfx.config.max_shader_engines = 4;
1852 		adev->gfx.config.max_tile_pipes = 16;
1853 		adev->gfx.config.max_cu_per_sh = 16;
1854 		adev->gfx.config.max_sh_per_se = 1;
1855 		adev->gfx.config.max_backends_per_se = 4;
1856 		adev->gfx.config.max_texture_channel_caches = 16;
1857 		adev->gfx.config.max_gprs = 256;
1858 		adev->gfx.config.max_gs_threads = 32;
1859 		adev->gfx.config.max_hw_contexts = 8;
1860 
1861 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1862 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1863 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1864 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1865 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1866 		break;
1867 	case CHIP_POLARIS11:
1868 	case CHIP_POLARIS12:
1869 		ret = amdgpu_atombios_get_gfx_info(adev);
1870 		if (ret)
1871 			return ret;
1872 		adev->gfx.config.max_gprs = 256;
1873 		adev->gfx.config.max_gs_threads = 32;
1874 		adev->gfx.config.max_hw_contexts = 8;
1875 
1876 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1877 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1878 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1879 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1880 		gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1881 		break;
1882 	case CHIP_POLARIS10:
1883 		ret = amdgpu_atombios_get_gfx_info(adev);
1884 		if (ret)
1885 			return ret;
1886 		adev->gfx.config.max_gprs = 256;
1887 		adev->gfx.config.max_gs_threads = 32;
1888 		adev->gfx.config.max_hw_contexts = 8;
1889 
1890 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1891 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1892 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1893 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1894 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1895 		break;
1896 	case CHIP_TONGA:
1897 		adev->gfx.config.max_shader_engines = 4;
1898 		adev->gfx.config.max_tile_pipes = 8;
1899 		adev->gfx.config.max_cu_per_sh = 8;
1900 		adev->gfx.config.max_sh_per_se = 1;
1901 		adev->gfx.config.max_backends_per_se = 2;
1902 		adev->gfx.config.max_texture_channel_caches = 8;
1903 		adev->gfx.config.max_gprs = 256;
1904 		adev->gfx.config.max_gs_threads = 32;
1905 		adev->gfx.config.max_hw_contexts = 8;
1906 
1907 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1908 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1909 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1910 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1911 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1912 		break;
1913 	case CHIP_CARRIZO:
1914 		adev->gfx.config.max_shader_engines = 1;
1915 		adev->gfx.config.max_tile_pipes = 2;
1916 		adev->gfx.config.max_sh_per_se = 1;
1917 		adev->gfx.config.max_backends_per_se = 2;
1918 
1919 		switch (adev->pdev->revision) {
1920 		case 0xc4:
1921 		case 0x84:
1922 		case 0xc8:
1923 		case 0xcc:
1924 		case 0xe1:
1925 		case 0xe3:
1926 			/* B10 */
1927 			adev->gfx.config.max_cu_per_sh = 8;
1928 			break;
1929 		case 0xc5:
1930 		case 0x81:
1931 		case 0x85:
1932 		case 0xc9:
1933 		case 0xcd:
1934 		case 0xe2:
1935 		case 0xe4:
1936 			/* B8 */
1937 			adev->gfx.config.max_cu_per_sh = 6;
1938 			break;
1939 		case 0xc6:
1940 		case 0xca:
1941 		case 0xce:
1942 		case 0x88:
1943 		case 0xe6:
1944 			/* B6 */
1945 			adev->gfx.config.max_cu_per_sh = 6;
1946 			break;
1947 		case 0xc7:
1948 		case 0x87:
1949 		case 0xcb:
1950 		case 0xe5:
1951 		case 0x89:
1952 		default:
1953 			/* B4 */
1954 			adev->gfx.config.max_cu_per_sh = 4;
1955 			break;
1956 		}
1957 
1958 		adev->gfx.config.max_texture_channel_caches = 2;
1959 		adev->gfx.config.max_gprs = 256;
1960 		adev->gfx.config.max_gs_threads = 32;
1961 		adev->gfx.config.max_hw_contexts = 8;
1962 
1963 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1964 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1965 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1966 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1967 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1968 		break;
1969 	case CHIP_STONEY:
1970 		adev->gfx.config.max_shader_engines = 1;
1971 		adev->gfx.config.max_tile_pipes = 2;
1972 		adev->gfx.config.max_sh_per_se = 1;
1973 		adev->gfx.config.max_backends_per_se = 1;
1974 
1975 		switch (adev->pdev->revision) {
1976 		case 0x80:
1977 		case 0x81:
1978 		case 0xc0:
1979 		case 0xc1:
1980 		case 0xc2:
1981 		case 0xc4:
1982 		case 0xc8:
1983 		case 0xc9:
1984 		case 0xd6:
1985 		case 0xda:
1986 		case 0xe9:
1987 		case 0xea:
1988 			adev->gfx.config.max_cu_per_sh = 3;
1989 			break;
1990 		case 0x83:
1991 		case 0xd0:
1992 		case 0xd1:
1993 		case 0xd2:
1994 		case 0xd4:
1995 		case 0xdb:
1996 		case 0xe1:
1997 		case 0xe2:
1998 		default:
1999 			adev->gfx.config.max_cu_per_sh = 2;
2000 			break;
2001 		}
2002 
2003 		adev->gfx.config.max_texture_channel_caches = 2;
2004 		adev->gfx.config.max_gprs = 256;
2005 		adev->gfx.config.max_gs_threads = 16;
2006 		adev->gfx.config.max_hw_contexts = 8;
2007 
2008 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2009 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2010 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2011 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2012 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
2013 		break;
2014 	default:
2015 		adev->gfx.config.max_shader_engines = 2;
2016 		adev->gfx.config.max_tile_pipes = 4;
2017 		adev->gfx.config.max_cu_per_sh = 2;
2018 		adev->gfx.config.max_sh_per_se = 1;
2019 		adev->gfx.config.max_backends_per_se = 2;
2020 		adev->gfx.config.max_texture_channel_caches = 4;
2021 		adev->gfx.config.max_gprs = 256;
2022 		adev->gfx.config.max_gs_threads = 32;
2023 		adev->gfx.config.max_hw_contexts = 8;
2024 
2025 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2026 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2027 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2028 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2029 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
2030 		break;
2031 	}
2032 
2033 	mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
2034 	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
2035 	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
2036 
2037 	adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
2038 	adev->gfx.config.mem_max_burst_length_bytes = 256;
2039 	if (adev->flags & AMD_IS_APU) {
2040 		/* Get memory bank mapping mode. */
2041 		tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
2042 		dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
2043 		dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
2044 
2045 		tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
2046 		dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
2047 		dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
2048 
2049 		/* Validate settings in case only one DIMM installed. */
2050 		if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
2051 			dimm00_addr_map = 0;
2052 		if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
2053 			dimm01_addr_map = 0;
2054 		if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
2055 			dimm10_addr_map = 0;
2056 		if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
2057 			dimm11_addr_map = 0;
2058 
2059 		/* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
2060 		/* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
2061 		if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
2062 			adev->gfx.config.mem_row_size_in_kb = 2;
2063 		else
2064 			adev->gfx.config.mem_row_size_in_kb = 1;
2065 	} else {
2066 		tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
2067 		adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2068 		if (adev->gfx.config.mem_row_size_in_kb > 4)
2069 			adev->gfx.config.mem_row_size_in_kb = 4;
2070 	}
2071 
2072 	adev->gfx.config.shader_engine_tile_size = 32;
2073 	adev->gfx.config.num_gpus = 1;
2074 	adev->gfx.config.multi_gpu_tile_size = 64;
2075 
2076 	/* fix up row size */
2077 	switch (adev->gfx.config.mem_row_size_in_kb) {
2078 	case 1:
2079 	default:
2080 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
2081 		break;
2082 	case 2:
2083 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
2084 		break;
2085 	case 4:
2086 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
2087 		break;
2088 	}
2089 	adev->gfx.config.gb_addr_config = gb_addr_config;
2090 
2091 	return 0;
2092 }
2093 
2094 static int gfx_v8_0_sw_init(void *handle)
2095 {
2096 	int i, r;
2097 	struct amdgpu_ring *ring;
2098 	struct amdgpu_kiq *kiq;
2099 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2100 
2101 	/* KIQ event */
2102 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
2103 	if (r)
2104 		return r;
2105 
2106 	/* EOP Event */
2107 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
2108 	if (r)
2109 		return r;
2110 
2111 	/* Privileged reg */
2112 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
2113 			      &adev->gfx.priv_reg_irq);
2114 	if (r)
2115 		return r;
2116 
2117 	/* Privileged inst */
2118 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
2119 			      &adev->gfx.priv_inst_irq);
2120 	if (r)
2121 		return r;
2122 
2123 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2124 
2125 	gfx_v8_0_scratch_init(adev);
2126 
2127 	r = gfx_v8_0_init_microcode(adev);
2128 	if (r) {
2129 		DRM_ERROR("Failed to load gfx firmware!\n");
2130 		return r;
2131 	}
2132 
2133 	r = gfx_v8_0_rlc_init(adev);
2134 	if (r) {
2135 		DRM_ERROR("Failed to init rlc BOs!\n");
2136 		return r;
2137 	}
2138 
2139 	r = gfx_v8_0_mec_init(adev);
2140 	if (r) {
2141 		DRM_ERROR("Failed to init MEC BOs!\n");
2142 		return r;
2143 	}
2144 
2145 	/* set up the gfx ring */
2146 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2147 		ring = &adev->gfx.gfx_ring[i];
2148 		ring->ring_obj = NULL;
2149 		sprintf(ring->name, "gfx");
2150 		/* no gfx doorbells on iceland */
2151 		if (adev->asic_type != CHIP_TOPAZ) {
2152 			ring->use_doorbell = true;
2153 			ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2154 		}
2155 
2156 		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2157 				     AMDGPU_CP_IRQ_GFX_EOP);
2158 		if (r)
2159 			return r;
2160 	}
2161 
2162 	/* set up the compute queues */
2163 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2164 		unsigned irq_type;
2165 
2166 		/* max 32 queues per MEC */
2167 		if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2168 			DRM_ERROR("Too many (%d) compute rings!\n", i);
2169 			break;
2170 		}
2171 		ring = &adev->gfx.compute_ring[i];
2172 		ring->ring_obj = NULL;
2173 		ring->use_doorbell = true;
2174 		ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2175 		ring->me = 1; /* first MEC */
2176 		ring->pipe = i / 8;
2177 		ring->queue = i % 8;
2178 		ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
2179 		sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2180 		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2181 		/* type-2 packets are deprecated on MEC, use type-3 instead */
2182 		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2183 				     irq_type);
2184 		if (r)
2185 			return r;
2186 	}
2187 
2188 	r = gfx_v8_0_kiq_init(adev);
2189 	if (r) {
2190 		DRM_ERROR("Failed to init KIQ BOs!\n");
2191 		return r;
2192 	}
2193 
2194 	kiq = &adev->gfx.kiq;
2195 	r = gfx_v8_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2196 	if (r)
2197 		return r;
2198 
2199 	/* create MQD for all compute queues as well as KIQ for SRIOV case */
2200 	r = gfx_v8_0_compute_mqd_sw_init(adev);
2201 	if (r)
2202 		return r;
2203 
2204 	/* reserve GDS, GWS and OA resource for gfx */
2205 	r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2206 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2207 				    &adev->gds.gds_gfx_bo, NULL, NULL);
2208 	if (r)
2209 		return r;
2210 
2211 	r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2212 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2213 				    &adev->gds.gws_gfx_bo, NULL, NULL);
2214 	if (r)
2215 		return r;
2216 
2217 	r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2218 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2219 				    &adev->gds.oa_gfx_bo, NULL, NULL);
2220 	if (r)
2221 		return r;
2222 
2223 	adev->gfx.ce_ram_size = 0x8000;
2224 
2225 	r = gfx_v8_0_gpu_early_init(adev);
2226 	if (r)
2227 		return r;
2228 
2229 	return 0;
2230 }
2231 
2232 static int gfx_v8_0_sw_fini(void *handle)
2233 {
2234 	int i;
2235 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2236 
2237 	amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2238 	amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2239 	amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2240 
2241 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2242 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2243 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2244 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2245 
2246 	gfx_v8_0_compute_mqd_sw_fini(adev);
2247 	gfx_v8_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2248 	gfx_v8_0_kiq_fini(adev);
2249 
2250 	gfx_v8_0_mec_fini(adev);
2251 	gfx_v8_0_rlc_fini(adev);
2252 	gfx_v8_0_free_microcode(adev);
2253 
2254 	return 0;
2255 }
2256 
2257 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2258 {
2259 	uint32_t *modearray, *mod2array;
2260 	const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2261 	const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2262 	u32 reg_offset;
2263 
2264 	modearray = adev->gfx.config.tile_mode_array;
2265 	mod2array = adev->gfx.config.macrotile_mode_array;
2266 
2267 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2268 		modearray[reg_offset] = 0;
2269 
2270 	for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2271 		mod2array[reg_offset] = 0;
2272 
2273 	switch (adev->asic_type) {
2274 	case CHIP_TOPAZ:
2275 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2276 				PIPE_CONFIG(ADDR_SURF_P2) |
2277 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2278 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2279 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2280 				PIPE_CONFIG(ADDR_SURF_P2) |
2281 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2282 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2283 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2284 				PIPE_CONFIG(ADDR_SURF_P2) |
2285 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2286 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2287 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2288 				PIPE_CONFIG(ADDR_SURF_P2) |
2289 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2290 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2291 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2292 				PIPE_CONFIG(ADDR_SURF_P2) |
2293 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2294 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2295 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2296 				PIPE_CONFIG(ADDR_SURF_P2) |
2297 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2298 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2299 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2300 				PIPE_CONFIG(ADDR_SURF_P2) |
2301 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2302 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2303 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2304 				PIPE_CONFIG(ADDR_SURF_P2));
2305 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2306 				PIPE_CONFIG(ADDR_SURF_P2) |
2307 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2308 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2309 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2310 				 PIPE_CONFIG(ADDR_SURF_P2) |
2311 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2312 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2313 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2314 				 PIPE_CONFIG(ADDR_SURF_P2) |
2315 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2316 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2317 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2318 				 PIPE_CONFIG(ADDR_SURF_P2) |
2319 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2320 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2321 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2322 				 PIPE_CONFIG(ADDR_SURF_P2) |
2323 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2324 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2325 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2326 				 PIPE_CONFIG(ADDR_SURF_P2) |
2327 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2328 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2329 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2330 				 PIPE_CONFIG(ADDR_SURF_P2) |
2331 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2332 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2333 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2334 				 PIPE_CONFIG(ADDR_SURF_P2) |
2335 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2336 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2337 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2338 				 PIPE_CONFIG(ADDR_SURF_P2) |
2339 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2340 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2341 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2342 				 PIPE_CONFIG(ADDR_SURF_P2) |
2343 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2344 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2345 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2346 				 PIPE_CONFIG(ADDR_SURF_P2) |
2347 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2348 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2349 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2350 				 PIPE_CONFIG(ADDR_SURF_P2) |
2351 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2352 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2353 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2354 				 PIPE_CONFIG(ADDR_SURF_P2) |
2355 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2356 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2357 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2358 				 PIPE_CONFIG(ADDR_SURF_P2) |
2359 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2360 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2361 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2362 				 PIPE_CONFIG(ADDR_SURF_P2) |
2363 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2364 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2365 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2366 				 PIPE_CONFIG(ADDR_SURF_P2) |
2367 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2368 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2369 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2370 				 PIPE_CONFIG(ADDR_SURF_P2) |
2371 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2372 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2373 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2374 				 PIPE_CONFIG(ADDR_SURF_P2) |
2375 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2376 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2377 
2378 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2379 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2380 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2381 				NUM_BANKS(ADDR_SURF_8_BANK));
2382 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2383 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2384 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2385 				NUM_BANKS(ADDR_SURF_8_BANK));
2386 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2387 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2388 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2389 				NUM_BANKS(ADDR_SURF_8_BANK));
2390 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2391 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2392 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2393 				NUM_BANKS(ADDR_SURF_8_BANK));
2394 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2395 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2396 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2397 				NUM_BANKS(ADDR_SURF_8_BANK));
2398 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2399 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2400 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2401 				NUM_BANKS(ADDR_SURF_8_BANK));
2402 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2403 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2404 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2405 				NUM_BANKS(ADDR_SURF_8_BANK));
2406 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2407 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2408 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2409 				NUM_BANKS(ADDR_SURF_16_BANK));
2410 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2411 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2412 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2413 				NUM_BANKS(ADDR_SURF_16_BANK));
2414 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2415 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2416 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2417 				 NUM_BANKS(ADDR_SURF_16_BANK));
2418 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2419 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2420 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2421 				 NUM_BANKS(ADDR_SURF_16_BANK));
2422 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2423 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2424 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2425 				 NUM_BANKS(ADDR_SURF_16_BANK));
2426 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2427 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2428 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2429 				 NUM_BANKS(ADDR_SURF_16_BANK));
2430 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2431 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2432 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2433 				 NUM_BANKS(ADDR_SURF_8_BANK));
2434 
2435 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2436 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2437 			    reg_offset != 23)
2438 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2439 
2440 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2441 			if (reg_offset != 7)
2442 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2443 
2444 		break;
2445 	case CHIP_FIJI:
2446 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2447 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2448 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2449 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2450 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2451 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2452 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2453 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2454 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2455 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2456 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2457 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2458 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2459 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2460 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2461 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2462 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2463 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2464 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2465 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2466 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2467 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2468 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2469 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2470 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2471 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2472 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2473 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2474 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2475 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2476 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2477 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2478 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2479 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2480 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2481 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2482 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2483 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2484 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2485 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2486 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2487 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2488 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2489 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2490 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2491 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2492 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2493 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2494 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2495 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2496 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2497 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2498 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2499 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2500 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2501 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2502 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2503 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2504 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2505 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2506 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2507 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2508 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2509 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2510 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2511 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2512 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2513 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2514 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2515 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2516 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2517 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2518 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2519 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2520 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2521 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2522 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2523 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2524 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2525 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2526 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2527 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2528 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2529 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2530 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2531 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2532 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2533 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2534 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2535 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2536 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2537 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2538 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2539 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2540 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2541 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2542 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2543 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2544 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2545 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2546 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2547 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2548 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2549 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2550 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2551 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2552 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2553 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2554 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2555 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2556 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2557 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2558 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2559 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2560 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2561 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2562 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2563 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2564 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2565 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2566 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2567 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2568 
2569 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2570 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2571 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2572 				NUM_BANKS(ADDR_SURF_8_BANK));
2573 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2574 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2575 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2576 				NUM_BANKS(ADDR_SURF_8_BANK));
2577 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2578 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2579 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2580 				NUM_BANKS(ADDR_SURF_8_BANK));
2581 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2582 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2583 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2584 				NUM_BANKS(ADDR_SURF_8_BANK));
2585 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2586 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2587 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2588 				NUM_BANKS(ADDR_SURF_8_BANK));
2589 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2590 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2591 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2592 				NUM_BANKS(ADDR_SURF_8_BANK));
2593 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2594 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2595 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2596 				NUM_BANKS(ADDR_SURF_8_BANK));
2597 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2598 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2599 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2600 				NUM_BANKS(ADDR_SURF_8_BANK));
2601 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2602 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2603 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2604 				NUM_BANKS(ADDR_SURF_8_BANK));
2605 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2606 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2607 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2608 				 NUM_BANKS(ADDR_SURF_8_BANK));
2609 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2610 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2611 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2612 				 NUM_BANKS(ADDR_SURF_8_BANK));
2613 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2614 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2615 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2616 				 NUM_BANKS(ADDR_SURF_8_BANK));
2617 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2618 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2619 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2620 				 NUM_BANKS(ADDR_SURF_8_BANK));
2621 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2622 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2623 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2624 				 NUM_BANKS(ADDR_SURF_4_BANK));
2625 
2626 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2627 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2628 
2629 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2630 			if (reg_offset != 7)
2631 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2632 
2633 		break;
2634 	case CHIP_TONGA:
2635 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2636 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2637 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2638 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2639 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2640 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2641 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2642 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2643 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2644 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2645 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2646 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2647 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2648 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2649 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2650 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2651 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2652 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2653 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2654 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2655 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2656 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2657 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2658 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2659 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2660 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2661 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2662 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2663 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2664 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2665 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2666 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2667 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2668 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2669 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2670 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2671 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2672 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2673 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2674 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2675 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2676 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2677 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2678 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2679 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2680 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2681 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2682 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2684 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2685 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2686 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2687 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2688 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2689 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2690 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2691 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2692 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2693 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2694 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2695 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2696 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2697 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2698 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2699 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2700 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2701 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2702 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2703 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2704 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2705 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2706 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2707 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2708 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2709 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2710 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2711 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2712 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2713 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2714 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2715 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2716 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2717 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2718 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2719 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2720 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2721 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2722 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2723 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2724 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2725 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2726 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2727 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2728 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2729 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2730 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2731 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2732 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2733 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2734 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2735 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2736 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2737 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2738 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2739 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2740 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2741 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2742 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2743 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2744 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2745 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2746 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2747 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2748 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2749 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2750 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2751 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2752 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2753 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2754 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2755 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2756 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2757 
2758 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2759 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2760 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2761 				NUM_BANKS(ADDR_SURF_16_BANK));
2762 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2764 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2765 				NUM_BANKS(ADDR_SURF_16_BANK));
2766 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2767 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2768 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2769 				NUM_BANKS(ADDR_SURF_16_BANK));
2770 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2771 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2772 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2773 				NUM_BANKS(ADDR_SURF_16_BANK));
2774 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2775 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2776 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2777 				NUM_BANKS(ADDR_SURF_16_BANK));
2778 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2779 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2780 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2781 				NUM_BANKS(ADDR_SURF_16_BANK));
2782 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2783 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2784 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2785 				NUM_BANKS(ADDR_SURF_16_BANK));
2786 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2787 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2788 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2789 				NUM_BANKS(ADDR_SURF_16_BANK));
2790 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2791 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2792 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2793 				NUM_BANKS(ADDR_SURF_16_BANK));
2794 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2795 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2796 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2797 				 NUM_BANKS(ADDR_SURF_16_BANK));
2798 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2799 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2800 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2801 				 NUM_BANKS(ADDR_SURF_16_BANK));
2802 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2803 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2804 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2805 				 NUM_BANKS(ADDR_SURF_8_BANK));
2806 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2807 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2808 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2809 				 NUM_BANKS(ADDR_SURF_4_BANK));
2810 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2811 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2812 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2813 				 NUM_BANKS(ADDR_SURF_4_BANK));
2814 
2815 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2816 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2817 
2818 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2819 			if (reg_offset != 7)
2820 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2821 
2822 		break;
2823 	case CHIP_POLARIS11:
2824 	case CHIP_POLARIS12:
2825 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2826 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2827 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2828 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2829 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2830 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2831 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2832 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2833 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2834 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2835 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2836 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2837 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2838 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2839 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2840 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2841 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2842 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2843 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2844 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2845 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2846 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2847 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2848 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2849 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2850 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2851 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2852 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2853 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2854 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2855 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2856 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2857 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2858 				PIPE_CONFIG(ADDR_SURF_P4_16x16));
2859 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2860 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2861 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2862 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2863 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2864 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2865 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2866 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2867 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2868 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2869 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2870 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2871 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2872 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2873 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2874 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2875 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2876 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2877 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2878 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2879 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2880 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2881 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2882 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2883 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2884 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2885 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2886 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2887 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2888 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2889 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2890 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2891 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2892 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2893 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2894 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2895 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2896 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2897 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2898 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2899 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2900 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2901 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2902 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2903 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2904 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2905 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2906 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2907 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2908 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2909 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2910 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2911 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2912 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2913 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2914 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2915 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2916 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2917 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2918 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2919 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2920 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2921 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2922 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2923 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2924 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2925 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2926 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2927 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2928 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2929 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2930 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2931 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2932 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2933 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2934 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2935 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2936 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2937 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2938 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2939 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2940 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2941 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2942 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2943 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2944 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2945 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2946 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2947 
2948 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2949 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2950 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2951 				NUM_BANKS(ADDR_SURF_16_BANK));
2952 
2953 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2954 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2955 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2956 				NUM_BANKS(ADDR_SURF_16_BANK));
2957 
2958 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2959 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2960 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2961 				NUM_BANKS(ADDR_SURF_16_BANK));
2962 
2963 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2964 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2965 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2966 				NUM_BANKS(ADDR_SURF_16_BANK));
2967 
2968 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2969 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2970 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2971 				NUM_BANKS(ADDR_SURF_16_BANK));
2972 
2973 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2974 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2975 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2976 				NUM_BANKS(ADDR_SURF_16_BANK));
2977 
2978 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2979 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2980 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2981 				NUM_BANKS(ADDR_SURF_16_BANK));
2982 
2983 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2984 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2985 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2986 				NUM_BANKS(ADDR_SURF_16_BANK));
2987 
2988 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2989 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2990 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2991 				NUM_BANKS(ADDR_SURF_16_BANK));
2992 
2993 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2994 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2995 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2996 				NUM_BANKS(ADDR_SURF_16_BANK));
2997 
2998 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2999 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3000 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3001 				NUM_BANKS(ADDR_SURF_16_BANK));
3002 
3003 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3004 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3005 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3006 				NUM_BANKS(ADDR_SURF_16_BANK));
3007 
3008 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3009 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3010 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3011 				NUM_BANKS(ADDR_SURF_8_BANK));
3012 
3013 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3014 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3015 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3016 				NUM_BANKS(ADDR_SURF_4_BANK));
3017 
3018 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3019 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3020 
3021 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3022 			if (reg_offset != 7)
3023 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3024 
3025 		break;
3026 	case CHIP_POLARIS10:
3027 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3028 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3029 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3030 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3031 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3032 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3033 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3034 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3035 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3036 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3037 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3038 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3039 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3040 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3041 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3042 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3043 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3044 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3045 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3046 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3047 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3048 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3049 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3050 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3051 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3052 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3053 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3054 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3055 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3056 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3057 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3058 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3059 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3060 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
3061 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3062 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3063 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3064 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3065 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3066 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3067 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3068 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3069 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3070 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3071 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3072 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3073 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3074 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3075 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3076 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3077 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3078 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3079 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3080 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3081 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3082 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3083 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3084 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3085 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3086 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3087 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3088 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3089 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3090 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3091 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3092 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3093 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3094 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3095 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3096 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3097 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3098 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3099 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3100 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3101 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3102 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3103 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3104 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3105 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3106 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3107 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3108 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3109 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3110 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3111 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3112 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3113 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3114 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3115 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3116 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3117 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3118 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3119 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3120 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3121 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3122 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3123 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3124 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3125 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3126 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3127 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3128 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3129 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3130 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3131 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3132 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3133 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3134 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3135 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3136 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3137 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3138 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3139 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3140 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3141 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3142 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3143 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3144 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3145 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3146 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3147 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3148 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3149 
3150 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3151 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3152 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3153 				NUM_BANKS(ADDR_SURF_16_BANK));
3154 
3155 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3156 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3157 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3158 				NUM_BANKS(ADDR_SURF_16_BANK));
3159 
3160 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3161 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3162 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3163 				NUM_BANKS(ADDR_SURF_16_BANK));
3164 
3165 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3166 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3167 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3168 				NUM_BANKS(ADDR_SURF_16_BANK));
3169 
3170 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3171 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3172 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3173 				NUM_BANKS(ADDR_SURF_16_BANK));
3174 
3175 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3176 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3177 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3178 				NUM_BANKS(ADDR_SURF_16_BANK));
3179 
3180 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3181 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3182 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3183 				NUM_BANKS(ADDR_SURF_16_BANK));
3184 
3185 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3186 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3187 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3188 				NUM_BANKS(ADDR_SURF_16_BANK));
3189 
3190 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3191 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3192 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3193 				NUM_BANKS(ADDR_SURF_16_BANK));
3194 
3195 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3196 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3197 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3198 				NUM_BANKS(ADDR_SURF_16_BANK));
3199 
3200 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3201 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3202 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3203 				NUM_BANKS(ADDR_SURF_16_BANK));
3204 
3205 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3206 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3207 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3208 				NUM_BANKS(ADDR_SURF_8_BANK));
3209 
3210 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3211 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3212 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3213 				NUM_BANKS(ADDR_SURF_4_BANK));
3214 
3215 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3216 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3217 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3218 				NUM_BANKS(ADDR_SURF_4_BANK));
3219 
3220 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3221 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3222 
3223 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3224 			if (reg_offset != 7)
3225 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3226 
3227 		break;
3228 	case CHIP_STONEY:
3229 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3230 				PIPE_CONFIG(ADDR_SURF_P2) |
3231 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3232 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3233 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3234 				PIPE_CONFIG(ADDR_SURF_P2) |
3235 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3236 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3237 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3238 				PIPE_CONFIG(ADDR_SURF_P2) |
3239 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3240 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3241 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3242 				PIPE_CONFIG(ADDR_SURF_P2) |
3243 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3244 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3245 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3246 				PIPE_CONFIG(ADDR_SURF_P2) |
3247 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3248 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3249 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3250 				PIPE_CONFIG(ADDR_SURF_P2) |
3251 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3252 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3253 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3254 				PIPE_CONFIG(ADDR_SURF_P2) |
3255 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3256 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3257 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3258 				PIPE_CONFIG(ADDR_SURF_P2));
3259 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3260 				PIPE_CONFIG(ADDR_SURF_P2) |
3261 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3262 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3263 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3264 				 PIPE_CONFIG(ADDR_SURF_P2) |
3265 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3266 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3267 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3268 				 PIPE_CONFIG(ADDR_SURF_P2) |
3269 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3270 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3271 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3272 				 PIPE_CONFIG(ADDR_SURF_P2) |
3273 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3274 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3275 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3276 				 PIPE_CONFIG(ADDR_SURF_P2) |
3277 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3278 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3279 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3280 				 PIPE_CONFIG(ADDR_SURF_P2) |
3281 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3282 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3283 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3284 				 PIPE_CONFIG(ADDR_SURF_P2) |
3285 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3286 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3287 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3288 				 PIPE_CONFIG(ADDR_SURF_P2) |
3289 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3290 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3291 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3292 				 PIPE_CONFIG(ADDR_SURF_P2) |
3293 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3294 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3295 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3296 				 PIPE_CONFIG(ADDR_SURF_P2) |
3297 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3298 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3299 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3300 				 PIPE_CONFIG(ADDR_SURF_P2) |
3301 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3302 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3303 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3304 				 PIPE_CONFIG(ADDR_SURF_P2) |
3305 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3306 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3307 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3308 				 PIPE_CONFIG(ADDR_SURF_P2) |
3309 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3310 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3311 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3312 				 PIPE_CONFIG(ADDR_SURF_P2) |
3313 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3314 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3315 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3316 				 PIPE_CONFIG(ADDR_SURF_P2) |
3317 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3318 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3319 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3320 				 PIPE_CONFIG(ADDR_SURF_P2) |
3321 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3322 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3323 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3324 				 PIPE_CONFIG(ADDR_SURF_P2) |
3325 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3326 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3327 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3328 				 PIPE_CONFIG(ADDR_SURF_P2) |
3329 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3330 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3331 
3332 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3333 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3334 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3335 				NUM_BANKS(ADDR_SURF_8_BANK));
3336 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3337 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3338 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3339 				NUM_BANKS(ADDR_SURF_8_BANK));
3340 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3341 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3342 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3343 				NUM_BANKS(ADDR_SURF_8_BANK));
3344 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3345 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3346 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3347 				NUM_BANKS(ADDR_SURF_8_BANK));
3348 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3349 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3350 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3351 				NUM_BANKS(ADDR_SURF_8_BANK));
3352 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3353 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3354 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3355 				NUM_BANKS(ADDR_SURF_8_BANK));
3356 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3357 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3358 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3359 				NUM_BANKS(ADDR_SURF_8_BANK));
3360 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3361 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3362 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3363 				NUM_BANKS(ADDR_SURF_16_BANK));
3364 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3365 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3366 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3367 				NUM_BANKS(ADDR_SURF_16_BANK));
3368 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3369 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3370 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3371 				 NUM_BANKS(ADDR_SURF_16_BANK));
3372 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3373 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3374 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3375 				 NUM_BANKS(ADDR_SURF_16_BANK));
3376 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3377 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3378 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3379 				 NUM_BANKS(ADDR_SURF_16_BANK));
3380 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3381 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3382 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3383 				 NUM_BANKS(ADDR_SURF_16_BANK));
3384 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3385 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3386 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3387 				 NUM_BANKS(ADDR_SURF_8_BANK));
3388 
3389 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3390 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3391 			    reg_offset != 23)
3392 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3393 
3394 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3395 			if (reg_offset != 7)
3396 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3397 
3398 		break;
3399 	default:
3400 		dev_warn(adev->dev,
3401 			 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3402 			 adev->asic_type);
3403 
3404 	case CHIP_CARRIZO:
3405 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3406 				PIPE_CONFIG(ADDR_SURF_P2) |
3407 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3408 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3409 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3410 				PIPE_CONFIG(ADDR_SURF_P2) |
3411 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3412 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3413 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3414 				PIPE_CONFIG(ADDR_SURF_P2) |
3415 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3416 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3417 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3418 				PIPE_CONFIG(ADDR_SURF_P2) |
3419 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3420 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3421 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3422 				PIPE_CONFIG(ADDR_SURF_P2) |
3423 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3424 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3425 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3426 				PIPE_CONFIG(ADDR_SURF_P2) |
3427 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3428 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3429 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3430 				PIPE_CONFIG(ADDR_SURF_P2) |
3431 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3432 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3433 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3434 				PIPE_CONFIG(ADDR_SURF_P2));
3435 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3436 				PIPE_CONFIG(ADDR_SURF_P2) |
3437 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3438 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3439 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3440 				 PIPE_CONFIG(ADDR_SURF_P2) |
3441 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3442 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3443 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3444 				 PIPE_CONFIG(ADDR_SURF_P2) |
3445 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3446 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3447 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3448 				 PIPE_CONFIG(ADDR_SURF_P2) |
3449 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3450 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3451 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3452 				 PIPE_CONFIG(ADDR_SURF_P2) |
3453 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3454 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3455 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3456 				 PIPE_CONFIG(ADDR_SURF_P2) |
3457 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3458 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3459 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3460 				 PIPE_CONFIG(ADDR_SURF_P2) |
3461 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3462 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3463 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3464 				 PIPE_CONFIG(ADDR_SURF_P2) |
3465 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3466 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3467 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3468 				 PIPE_CONFIG(ADDR_SURF_P2) |
3469 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3470 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3471 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3472 				 PIPE_CONFIG(ADDR_SURF_P2) |
3473 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3474 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3475 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3476 				 PIPE_CONFIG(ADDR_SURF_P2) |
3477 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3478 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3479 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3480 				 PIPE_CONFIG(ADDR_SURF_P2) |
3481 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3482 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3483 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3484 				 PIPE_CONFIG(ADDR_SURF_P2) |
3485 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3486 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3487 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3488 				 PIPE_CONFIG(ADDR_SURF_P2) |
3489 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3490 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3491 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3492 				 PIPE_CONFIG(ADDR_SURF_P2) |
3493 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3494 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3495 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3496 				 PIPE_CONFIG(ADDR_SURF_P2) |
3497 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3498 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3499 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3500 				 PIPE_CONFIG(ADDR_SURF_P2) |
3501 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3502 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3503 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3504 				 PIPE_CONFIG(ADDR_SURF_P2) |
3505 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3506 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3507 
3508 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3509 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3510 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3511 				NUM_BANKS(ADDR_SURF_8_BANK));
3512 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3513 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3514 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3515 				NUM_BANKS(ADDR_SURF_8_BANK));
3516 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3517 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3518 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3519 				NUM_BANKS(ADDR_SURF_8_BANK));
3520 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3521 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3522 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3523 				NUM_BANKS(ADDR_SURF_8_BANK));
3524 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3525 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3526 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3527 				NUM_BANKS(ADDR_SURF_8_BANK));
3528 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3529 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3530 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3531 				NUM_BANKS(ADDR_SURF_8_BANK));
3532 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3533 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3534 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3535 				NUM_BANKS(ADDR_SURF_8_BANK));
3536 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3537 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3538 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3539 				NUM_BANKS(ADDR_SURF_16_BANK));
3540 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3541 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3542 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3543 				NUM_BANKS(ADDR_SURF_16_BANK));
3544 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3545 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3546 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3547 				 NUM_BANKS(ADDR_SURF_16_BANK));
3548 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3549 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3550 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3551 				 NUM_BANKS(ADDR_SURF_16_BANK));
3552 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3553 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3554 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3555 				 NUM_BANKS(ADDR_SURF_16_BANK));
3556 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3557 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3558 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3559 				 NUM_BANKS(ADDR_SURF_16_BANK));
3560 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3561 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3562 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3563 				 NUM_BANKS(ADDR_SURF_8_BANK));
3564 
3565 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3566 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3567 			    reg_offset != 23)
3568 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3569 
3570 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3571 			if (reg_offset != 7)
3572 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3573 
3574 		break;
3575 	}
3576 }
3577 
3578 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3579 				  u32 se_num, u32 sh_num, u32 instance)
3580 {
3581 	u32 data;
3582 
3583 	if (instance == 0xffffffff)
3584 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3585 	else
3586 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3587 
3588 	if (se_num == 0xffffffff)
3589 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3590 	else
3591 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3592 
3593 	if (sh_num == 0xffffffff)
3594 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3595 	else
3596 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3597 
3598 	WREG32(mmGRBM_GFX_INDEX, data);
3599 }
3600 
3601 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3602 {
3603 	return (u32)((1ULL << bit_width) - 1);
3604 }
3605 
3606 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3607 {
3608 	u32 data, mask;
3609 
3610 	data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3611 		RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3612 
3613 	data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3614 
3615 	mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3616 				       adev->gfx.config.max_sh_per_se);
3617 
3618 	return (~data) & mask;
3619 }
3620 
3621 static void
3622 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3623 {
3624 	switch (adev->asic_type) {
3625 	case CHIP_FIJI:
3626 		*rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3627 			  RB_XSEL2(1) | PKR_MAP(2) |
3628 			  PKR_XSEL(1) | PKR_YSEL(1) |
3629 			  SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3630 		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3631 			   SE_PAIR_YSEL(2);
3632 		break;
3633 	case CHIP_TONGA:
3634 	case CHIP_POLARIS10:
3635 		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3636 			  SE_XSEL(1) | SE_YSEL(1);
3637 		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3638 			   SE_PAIR_YSEL(2);
3639 		break;
3640 	case CHIP_TOPAZ:
3641 	case CHIP_CARRIZO:
3642 		*rconf |= RB_MAP_PKR0(2);
3643 		*rconf1 |= 0x0;
3644 		break;
3645 	case CHIP_POLARIS11:
3646 	case CHIP_POLARIS12:
3647 		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3648 			  SE_XSEL(1) | SE_YSEL(1);
3649 		*rconf1 |= 0x0;
3650 		break;
3651 	case CHIP_STONEY:
3652 		*rconf |= 0x0;
3653 		*rconf1 |= 0x0;
3654 		break;
3655 	default:
3656 		DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3657 		break;
3658 	}
3659 }
3660 
3661 static void
3662 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3663 					u32 raster_config, u32 raster_config_1,
3664 					unsigned rb_mask, unsigned num_rb)
3665 {
3666 	unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3667 	unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3668 	unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3669 	unsigned rb_per_se = num_rb / num_se;
3670 	unsigned se_mask[4];
3671 	unsigned se;
3672 
3673 	se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3674 	se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3675 	se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3676 	se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3677 
3678 	WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3679 	WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3680 	WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3681 
3682 	if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3683 			     (!se_mask[2] && !se_mask[3]))) {
3684 		raster_config_1 &= ~SE_PAIR_MAP_MASK;
3685 
3686 		if (!se_mask[0] && !se_mask[1]) {
3687 			raster_config_1 |=
3688 				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3689 		} else {
3690 			raster_config_1 |=
3691 				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3692 		}
3693 	}
3694 
3695 	for (se = 0; se < num_se; se++) {
3696 		unsigned raster_config_se = raster_config;
3697 		unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3698 		unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3699 		int idx = (se / 2) * 2;
3700 
3701 		if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3702 			raster_config_se &= ~SE_MAP_MASK;
3703 
3704 			if (!se_mask[idx]) {
3705 				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3706 			} else {
3707 				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3708 			}
3709 		}
3710 
3711 		pkr0_mask &= rb_mask;
3712 		pkr1_mask &= rb_mask;
3713 		if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3714 			raster_config_se &= ~PKR_MAP_MASK;
3715 
3716 			if (!pkr0_mask) {
3717 				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3718 			} else {
3719 				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3720 			}
3721 		}
3722 
3723 		if (rb_per_se >= 2) {
3724 			unsigned rb0_mask = 1 << (se * rb_per_se);
3725 			unsigned rb1_mask = rb0_mask << 1;
3726 
3727 			rb0_mask &= rb_mask;
3728 			rb1_mask &= rb_mask;
3729 			if (!rb0_mask || !rb1_mask) {
3730 				raster_config_se &= ~RB_MAP_PKR0_MASK;
3731 
3732 				if (!rb0_mask) {
3733 					raster_config_se |=
3734 						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3735 				} else {
3736 					raster_config_se |=
3737 						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3738 				}
3739 			}
3740 
3741 			if (rb_per_se > 2) {
3742 				rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3743 				rb1_mask = rb0_mask << 1;
3744 				rb0_mask &= rb_mask;
3745 				rb1_mask &= rb_mask;
3746 				if (!rb0_mask || !rb1_mask) {
3747 					raster_config_se &= ~RB_MAP_PKR1_MASK;
3748 
3749 					if (!rb0_mask) {
3750 						raster_config_se |=
3751 							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3752 					} else {
3753 						raster_config_se |=
3754 							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3755 					}
3756 				}
3757 			}
3758 		}
3759 
3760 		/* GRBM_GFX_INDEX has a different offset on VI */
3761 		gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3762 		WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3763 		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3764 	}
3765 
3766 	/* GRBM_GFX_INDEX has a different offset on VI */
3767 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3768 }
3769 
3770 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3771 {
3772 	int i, j;
3773 	u32 data;
3774 	u32 raster_config = 0, raster_config_1 = 0;
3775 	u32 active_rbs = 0;
3776 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3777 					adev->gfx.config.max_sh_per_se;
3778 	unsigned num_rb_pipes;
3779 
3780 	mutex_lock(&adev->grbm_idx_mutex);
3781 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3782 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3783 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3784 			data = gfx_v8_0_get_rb_active_bitmap(adev);
3785 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3786 					       rb_bitmap_width_per_sh);
3787 		}
3788 	}
3789 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3790 
3791 	adev->gfx.config.backend_enable_mask = active_rbs;
3792 	adev->gfx.config.num_rbs = hweight32(active_rbs);
3793 
3794 	num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3795 			     adev->gfx.config.max_shader_engines, 16);
3796 
3797 	gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3798 
3799 	if (!adev->gfx.config.backend_enable_mask ||
3800 			adev->gfx.config.num_rbs >= num_rb_pipes) {
3801 		WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3802 		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3803 	} else {
3804 		gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3805 							adev->gfx.config.backend_enable_mask,
3806 							num_rb_pipes);
3807 	}
3808 
3809 	/* cache the values for userspace */
3810 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3811 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3812 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3813 			adev->gfx.config.rb_config[i][j].rb_backend_disable =
3814 				RREG32(mmCC_RB_BACKEND_DISABLE);
3815 			adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3816 				RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3817 			adev->gfx.config.rb_config[i][j].raster_config =
3818 				RREG32(mmPA_SC_RASTER_CONFIG);
3819 			adev->gfx.config.rb_config[i][j].raster_config_1 =
3820 				RREG32(mmPA_SC_RASTER_CONFIG_1);
3821 		}
3822 	}
3823 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3824 	mutex_unlock(&adev->grbm_idx_mutex);
3825 }
3826 
3827 /**
3828  * gfx_v8_0_init_compute_vmid - gart enable
3829  *
3830  * @rdev: amdgpu_device pointer
3831  *
3832  * Initialize compute vmid sh_mem registers
3833  *
3834  */
3835 #define DEFAULT_SH_MEM_BASES	(0x6000)
3836 #define FIRST_COMPUTE_VMID	(8)
3837 #define LAST_COMPUTE_VMID	(16)
3838 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3839 {
3840 	int i;
3841 	uint32_t sh_mem_config;
3842 	uint32_t sh_mem_bases;
3843 
3844 	/*
3845 	 * Configure apertures:
3846 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3847 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3848 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3849 	 */
3850 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3851 
3852 	sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3853 			SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3854 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3855 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3856 			MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3857 			SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3858 
3859 	mutex_lock(&adev->srbm_mutex);
3860 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3861 		vi_srbm_select(adev, 0, 0, 0, i);
3862 		/* CP and shaders */
3863 		WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3864 		WREG32(mmSH_MEM_APE1_BASE, 1);
3865 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3866 		WREG32(mmSH_MEM_BASES, sh_mem_bases);
3867 	}
3868 	vi_srbm_select(adev, 0, 0, 0, 0);
3869 	mutex_unlock(&adev->srbm_mutex);
3870 }
3871 
3872 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3873 {
3874 	switch (adev->asic_type) {
3875 	default:
3876 		adev->gfx.config.double_offchip_lds_buf = 1;
3877 		break;
3878 	case CHIP_CARRIZO:
3879 	case CHIP_STONEY:
3880 		adev->gfx.config.double_offchip_lds_buf = 0;
3881 		break;
3882 	}
3883 }
3884 
3885 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3886 {
3887 	u32 tmp, sh_static_mem_cfg;
3888 	int i;
3889 
3890 	WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3891 	WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3892 	WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3893 	WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3894 
3895 	gfx_v8_0_tiling_mode_table_init(adev);
3896 	gfx_v8_0_setup_rb(adev);
3897 	gfx_v8_0_get_cu_info(adev);
3898 	gfx_v8_0_config_init(adev);
3899 
3900 	/* XXX SH_MEM regs */
3901 	/* where to put LDS, scratch, GPUVM in FSA64 space */
3902 	sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3903 				   SWIZZLE_ENABLE, 1);
3904 	sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3905 				   ELEMENT_SIZE, 1);
3906 	sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3907 				   INDEX_STRIDE, 3);
3908 	mutex_lock(&adev->srbm_mutex);
3909 	for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3910 		vi_srbm_select(adev, 0, 0, 0, i);
3911 		/* CP and shaders */
3912 		if (i == 0) {
3913 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3914 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3915 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3916 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3917 			WREG32(mmSH_MEM_CONFIG, tmp);
3918 			WREG32(mmSH_MEM_BASES, 0);
3919 		} else {
3920 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3921 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3922 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3923 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3924 			WREG32(mmSH_MEM_CONFIG, tmp);
3925 			tmp = adev->mc.shared_aperture_start >> 48;
3926 			WREG32(mmSH_MEM_BASES, tmp);
3927 		}
3928 
3929 		WREG32(mmSH_MEM_APE1_BASE, 1);
3930 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3931 		WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3932 	}
3933 	vi_srbm_select(adev, 0, 0, 0, 0);
3934 	mutex_unlock(&adev->srbm_mutex);
3935 
3936 	gfx_v8_0_init_compute_vmid(adev);
3937 
3938 	mutex_lock(&adev->grbm_idx_mutex);
3939 	/*
3940 	 * making sure that the following register writes will be broadcasted
3941 	 * to all the shaders
3942 	 */
3943 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3944 
3945 	WREG32(mmPA_SC_FIFO_SIZE,
3946 		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
3947 			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3948 		   (adev->gfx.config.sc_prim_fifo_size_backend <<
3949 			PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3950 		   (adev->gfx.config.sc_hiz_tile_fifo_size <<
3951 			PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3952 		   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3953 			PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3954 
3955 	tmp = RREG32(mmSPI_ARB_PRIORITY);
3956 	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3957 	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3958 	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3959 	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3960 	WREG32(mmSPI_ARB_PRIORITY, tmp);
3961 
3962 	mutex_unlock(&adev->grbm_idx_mutex);
3963 
3964 }
3965 
3966 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3967 {
3968 	u32 i, j, k;
3969 	u32 mask;
3970 
3971 	mutex_lock(&adev->grbm_idx_mutex);
3972 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3973 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3974 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3975 			for (k = 0; k < adev->usec_timeout; k++) {
3976 				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3977 					break;
3978 				udelay(1);
3979 			}
3980 		}
3981 	}
3982 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3983 	mutex_unlock(&adev->grbm_idx_mutex);
3984 
3985 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3986 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3987 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3988 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3989 	for (k = 0; k < adev->usec_timeout; k++) {
3990 		if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3991 			break;
3992 		udelay(1);
3993 	}
3994 }
3995 
3996 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3997 					       bool enable)
3998 {
3999 	u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
4000 
4001 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
4002 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
4003 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
4004 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
4005 
4006 	WREG32(mmCP_INT_CNTL_RING0, tmp);
4007 }
4008 
4009 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
4010 {
4011 	/* csib */
4012 	WREG32(mmRLC_CSIB_ADDR_HI,
4013 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
4014 	WREG32(mmRLC_CSIB_ADDR_LO,
4015 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
4016 	WREG32(mmRLC_CSIB_LENGTH,
4017 			adev->gfx.rlc.clear_state_size);
4018 }
4019 
4020 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
4021 				int ind_offset,
4022 				int list_size,
4023 				int *unique_indices,
4024 				int *indices_count,
4025 				int max_indices,
4026 				int *ind_start_offsets,
4027 				int *offset_count,
4028 				int max_offset)
4029 {
4030 	int indices;
4031 	bool new_entry = true;
4032 
4033 	for (; ind_offset < list_size; ind_offset++) {
4034 
4035 		if (new_entry) {
4036 			new_entry = false;
4037 			ind_start_offsets[*offset_count] = ind_offset;
4038 			*offset_count = *offset_count + 1;
4039 			BUG_ON(*offset_count >= max_offset);
4040 		}
4041 
4042 		if (register_list_format[ind_offset] == 0xFFFFFFFF) {
4043 			new_entry = true;
4044 			continue;
4045 		}
4046 
4047 		ind_offset += 2;
4048 
4049 		/* look for the matching indice */
4050 		for (indices = 0;
4051 			indices < *indices_count;
4052 			indices++) {
4053 			if (unique_indices[indices] ==
4054 				register_list_format[ind_offset])
4055 				break;
4056 		}
4057 
4058 		if (indices >= *indices_count) {
4059 			unique_indices[*indices_count] =
4060 				register_list_format[ind_offset];
4061 			indices = *indices_count;
4062 			*indices_count = *indices_count + 1;
4063 			BUG_ON(*indices_count >= max_indices);
4064 		}
4065 
4066 		register_list_format[ind_offset] = indices;
4067 	}
4068 }
4069 
4070 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
4071 {
4072 	int i, temp, data;
4073 	int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
4074 	int indices_count = 0;
4075 	int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
4076 	int offset_count = 0;
4077 
4078 	int list_size;
4079 	unsigned int *register_list_format =
4080 		kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
4081 	if (!register_list_format)
4082 		return -ENOMEM;
4083 	memcpy(register_list_format, adev->gfx.rlc.register_list_format,
4084 			adev->gfx.rlc.reg_list_format_size_bytes);
4085 
4086 	gfx_v8_0_parse_ind_reg_list(register_list_format,
4087 				RLC_FormatDirectRegListLength,
4088 				adev->gfx.rlc.reg_list_format_size_bytes >> 2,
4089 				unique_indices,
4090 				&indices_count,
4091 				sizeof(unique_indices) / sizeof(int),
4092 				indirect_start_offsets,
4093 				&offset_count,
4094 				sizeof(indirect_start_offsets)/sizeof(int));
4095 
4096 	/* save and restore list */
4097 	WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
4098 
4099 	WREG32(mmRLC_SRM_ARAM_ADDR, 0);
4100 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
4101 		WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4102 
4103 	/* indirect list */
4104 	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4105 	for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4106 		WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4107 
4108 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4109 	list_size = list_size >> 1;
4110 	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4111 	WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4112 
4113 	/* starting offsets starts */
4114 	WREG32(mmRLC_GPM_SCRATCH_ADDR,
4115 		adev->gfx.rlc.starting_offsets_start);
4116 	for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
4117 		WREG32(mmRLC_GPM_SCRATCH_DATA,
4118 				indirect_start_offsets[i]);
4119 
4120 	/* unique indices */
4121 	temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4122 	data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4123 	for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
4124 		if (unique_indices[i] != 0) {
4125 			WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4126 			WREG32(data + i, unique_indices[i] >> 20);
4127 		}
4128 	}
4129 	kfree(register_list_format);
4130 
4131 	return 0;
4132 }
4133 
4134 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4135 {
4136 	WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4137 }
4138 
4139 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4140 {
4141 	uint32_t data;
4142 
4143 	WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4144 
4145 	data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4146 	data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4147 	data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4148 	data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4149 	WREG32(mmRLC_PG_DELAY, data);
4150 
4151 	WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4152 	WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4153 
4154 }
4155 
4156 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4157 						bool enable)
4158 {
4159 	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4160 }
4161 
4162 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4163 						  bool enable)
4164 {
4165 	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4166 }
4167 
4168 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4169 {
4170 	WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4171 }
4172 
4173 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4174 {
4175 	if ((adev->asic_type == CHIP_CARRIZO) ||
4176 	    (adev->asic_type == CHIP_STONEY)) {
4177 		gfx_v8_0_init_csb(adev);
4178 		gfx_v8_0_init_save_restore_list(adev);
4179 		gfx_v8_0_enable_save_restore_machine(adev);
4180 		WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4181 		gfx_v8_0_init_power_gating(adev);
4182 		WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4183 	} else if ((adev->asic_type == CHIP_POLARIS11) ||
4184 		   (adev->asic_type == CHIP_POLARIS12)) {
4185 		gfx_v8_0_init_csb(adev);
4186 		gfx_v8_0_init_save_restore_list(adev);
4187 		gfx_v8_0_enable_save_restore_machine(adev);
4188 		gfx_v8_0_init_power_gating(adev);
4189 	}
4190 
4191 }
4192 
4193 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4194 {
4195 	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4196 
4197 	gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4198 	gfx_v8_0_wait_for_rlc_serdes(adev);
4199 }
4200 
4201 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4202 {
4203 	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4204 	udelay(50);
4205 
4206 	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4207 	udelay(50);
4208 }
4209 
4210 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4211 {
4212 	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4213 
4214 	/* carrizo do enable cp interrupt after cp inited */
4215 	if (!(adev->flags & AMD_IS_APU))
4216 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4217 
4218 	udelay(50);
4219 }
4220 
4221 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4222 {
4223 	const struct rlc_firmware_header_v2_0 *hdr;
4224 	const __le32 *fw_data;
4225 	unsigned i, fw_size;
4226 
4227 	if (!adev->gfx.rlc_fw)
4228 		return -EINVAL;
4229 
4230 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4231 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
4232 
4233 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4234 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4235 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4236 
4237 	WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4238 	for (i = 0; i < fw_size; i++)
4239 		WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4240 	WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4241 
4242 	return 0;
4243 }
4244 
4245 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4246 {
4247 	int r;
4248 	u32 tmp;
4249 
4250 	gfx_v8_0_rlc_stop(adev);
4251 
4252 	/* disable CG */
4253 	tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4254 	tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4255 		 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4256 	WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4257 	if (adev->asic_type == CHIP_POLARIS11 ||
4258 	    adev->asic_type == CHIP_POLARIS10 ||
4259 	    adev->asic_type == CHIP_POLARIS12) {
4260 		tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4261 		tmp &= ~0x3;
4262 		WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4263 	}
4264 
4265 	/* disable PG */
4266 	WREG32(mmRLC_PG_CNTL, 0);
4267 
4268 	gfx_v8_0_rlc_reset(adev);
4269 	gfx_v8_0_init_pg(adev);
4270 
4271 	if (!adev->pp_enabled) {
4272 		if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
4273 			/* legacy rlc firmware loading */
4274 			r = gfx_v8_0_rlc_load_microcode(adev);
4275 			if (r)
4276 				return r;
4277 		} else {
4278 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4279 							AMDGPU_UCODE_ID_RLC_G);
4280 			if (r)
4281 				return -EINVAL;
4282 		}
4283 	}
4284 
4285 	gfx_v8_0_rlc_start(adev);
4286 
4287 	return 0;
4288 }
4289 
4290 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4291 {
4292 	int i;
4293 	u32 tmp = RREG32(mmCP_ME_CNTL);
4294 
4295 	if (enable) {
4296 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4297 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4298 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4299 	} else {
4300 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4301 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4302 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4303 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4304 			adev->gfx.gfx_ring[i].ready = false;
4305 	}
4306 	WREG32(mmCP_ME_CNTL, tmp);
4307 	udelay(50);
4308 }
4309 
4310 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4311 {
4312 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
4313 	const struct gfx_firmware_header_v1_0 *ce_hdr;
4314 	const struct gfx_firmware_header_v1_0 *me_hdr;
4315 	const __le32 *fw_data;
4316 	unsigned i, fw_size;
4317 
4318 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4319 		return -EINVAL;
4320 
4321 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4322 		adev->gfx.pfp_fw->data;
4323 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4324 		adev->gfx.ce_fw->data;
4325 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
4326 		adev->gfx.me_fw->data;
4327 
4328 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4329 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4330 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4331 
4332 	gfx_v8_0_cp_gfx_enable(adev, false);
4333 
4334 	/* PFP */
4335 	fw_data = (const __le32 *)
4336 		(adev->gfx.pfp_fw->data +
4337 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4338 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4339 	WREG32(mmCP_PFP_UCODE_ADDR, 0);
4340 	for (i = 0; i < fw_size; i++)
4341 		WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4342 	WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4343 
4344 	/* CE */
4345 	fw_data = (const __le32 *)
4346 		(adev->gfx.ce_fw->data +
4347 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4348 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4349 	WREG32(mmCP_CE_UCODE_ADDR, 0);
4350 	for (i = 0; i < fw_size; i++)
4351 		WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4352 	WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4353 
4354 	/* ME */
4355 	fw_data = (const __le32 *)
4356 		(adev->gfx.me_fw->data +
4357 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4358 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4359 	WREG32(mmCP_ME_RAM_WADDR, 0);
4360 	for (i = 0; i < fw_size; i++)
4361 		WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4362 	WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4363 
4364 	return 0;
4365 }
4366 
4367 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4368 {
4369 	u32 count = 0;
4370 	const struct cs_section_def *sect = NULL;
4371 	const struct cs_extent_def *ext = NULL;
4372 
4373 	/* begin clear state */
4374 	count += 2;
4375 	/* context control state */
4376 	count += 3;
4377 
4378 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4379 		for (ext = sect->section; ext->extent != NULL; ++ext) {
4380 			if (sect->id == SECT_CONTEXT)
4381 				count += 2 + ext->reg_count;
4382 			else
4383 				return 0;
4384 		}
4385 	}
4386 	/* pa_sc_raster_config/pa_sc_raster_config1 */
4387 	count += 4;
4388 	/* end clear state */
4389 	count += 2;
4390 	/* clear state */
4391 	count += 2;
4392 
4393 	return count;
4394 }
4395 
4396 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4397 {
4398 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4399 	const struct cs_section_def *sect = NULL;
4400 	const struct cs_extent_def *ext = NULL;
4401 	int r, i;
4402 
4403 	/* init the CP */
4404 	WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4405 	WREG32(mmCP_ENDIAN_SWAP, 0);
4406 	WREG32(mmCP_DEVICE_ID, 1);
4407 
4408 	gfx_v8_0_cp_gfx_enable(adev, true);
4409 
4410 	r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4411 	if (r) {
4412 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4413 		return r;
4414 	}
4415 
4416 	/* clear state buffer */
4417 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4418 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4419 
4420 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4421 	amdgpu_ring_write(ring, 0x80000000);
4422 	amdgpu_ring_write(ring, 0x80000000);
4423 
4424 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4425 		for (ext = sect->section; ext->extent != NULL; ++ext) {
4426 			if (sect->id == SECT_CONTEXT) {
4427 				amdgpu_ring_write(ring,
4428 				       PACKET3(PACKET3_SET_CONTEXT_REG,
4429 					       ext->reg_count));
4430 				amdgpu_ring_write(ring,
4431 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4432 				for (i = 0; i < ext->reg_count; i++)
4433 					amdgpu_ring_write(ring, ext->extent[i]);
4434 			}
4435 		}
4436 	}
4437 
4438 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4439 	amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4440 	switch (adev->asic_type) {
4441 	case CHIP_TONGA:
4442 	case CHIP_POLARIS10:
4443 		amdgpu_ring_write(ring, 0x16000012);
4444 		amdgpu_ring_write(ring, 0x0000002A);
4445 		break;
4446 	case CHIP_POLARIS11:
4447 	case CHIP_POLARIS12:
4448 		amdgpu_ring_write(ring, 0x16000012);
4449 		amdgpu_ring_write(ring, 0x00000000);
4450 		break;
4451 	case CHIP_FIJI:
4452 		amdgpu_ring_write(ring, 0x3a00161a);
4453 		amdgpu_ring_write(ring, 0x0000002e);
4454 		break;
4455 	case CHIP_CARRIZO:
4456 		amdgpu_ring_write(ring, 0x00000002);
4457 		amdgpu_ring_write(ring, 0x00000000);
4458 		break;
4459 	case CHIP_TOPAZ:
4460 		amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4461 				0x00000000 : 0x00000002);
4462 		amdgpu_ring_write(ring, 0x00000000);
4463 		break;
4464 	case CHIP_STONEY:
4465 		amdgpu_ring_write(ring, 0x00000000);
4466 		amdgpu_ring_write(ring, 0x00000000);
4467 		break;
4468 	default:
4469 		BUG();
4470 	}
4471 
4472 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4473 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4474 
4475 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4476 	amdgpu_ring_write(ring, 0);
4477 
4478 	/* init the CE partitions */
4479 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4480 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4481 	amdgpu_ring_write(ring, 0x8000);
4482 	amdgpu_ring_write(ring, 0x8000);
4483 
4484 	amdgpu_ring_commit(ring);
4485 
4486 	return 0;
4487 }
4488 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4489 {
4490 	u32 tmp;
4491 	/* no gfx doorbells on iceland */
4492 	if (adev->asic_type == CHIP_TOPAZ)
4493 		return;
4494 
4495 	tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4496 
4497 	if (ring->use_doorbell) {
4498 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4499 				DOORBELL_OFFSET, ring->doorbell_index);
4500 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4501 						DOORBELL_HIT, 0);
4502 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4503 					    DOORBELL_EN, 1);
4504 	} else {
4505 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4506 	}
4507 
4508 	WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4509 
4510 	if (adev->flags & AMD_IS_APU)
4511 		return;
4512 
4513 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4514 					DOORBELL_RANGE_LOWER,
4515 					AMDGPU_DOORBELL_GFX_RING0);
4516 	WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4517 
4518 	WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4519 		CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4520 }
4521 
4522 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4523 {
4524 	struct amdgpu_ring *ring;
4525 	u32 tmp;
4526 	u32 rb_bufsz;
4527 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
4528 	int r;
4529 
4530 	/* Set the write pointer delay */
4531 	WREG32(mmCP_RB_WPTR_DELAY, 0);
4532 
4533 	/* set the RB to use vmid 0 */
4534 	WREG32(mmCP_RB_VMID, 0);
4535 
4536 	/* Set ring buffer size */
4537 	ring = &adev->gfx.gfx_ring[0];
4538 	rb_bufsz = order_base_2(ring->ring_size / 8);
4539 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4540 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4541 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4542 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4543 #ifdef __BIG_ENDIAN
4544 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4545 #endif
4546 	WREG32(mmCP_RB0_CNTL, tmp);
4547 
4548 	/* Initialize the ring buffer's read and write pointers */
4549 	WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4550 	ring->wptr = 0;
4551 	WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4552 
4553 	/* set the wb address wether it's enabled or not */
4554 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4555 	WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4556 	WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4557 
4558 	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4559 	WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4560 	WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4561 	mdelay(1);
4562 	WREG32(mmCP_RB0_CNTL, tmp);
4563 
4564 	rb_addr = ring->gpu_addr >> 8;
4565 	WREG32(mmCP_RB0_BASE, rb_addr);
4566 	WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4567 
4568 	gfx_v8_0_set_cpg_door_bell(adev, ring);
4569 	/* start the ring */
4570 	amdgpu_ring_clear_ring(ring);
4571 	gfx_v8_0_cp_gfx_start(adev);
4572 	ring->ready = true;
4573 	r = amdgpu_ring_test_ring(ring);
4574 	if (r)
4575 		ring->ready = false;
4576 
4577 	return r;
4578 }
4579 
4580 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4581 {
4582 	int i;
4583 
4584 	if (enable) {
4585 		WREG32(mmCP_MEC_CNTL, 0);
4586 	} else {
4587 		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4588 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
4589 			adev->gfx.compute_ring[i].ready = false;
4590 		adev->gfx.kiq.ring.ready = false;
4591 	}
4592 	udelay(50);
4593 }
4594 
4595 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4596 {
4597 	const struct gfx_firmware_header_v1_0 *mec_hdr;
4598 	const __le32 *fw_data;
4599 	unsigned i, fw_size;
4600 
4601 	if (!adev->gfx.mec_fw)
4602 		return -EINVAL;
4603 
4604 	gfx_v8_0_cp_compute_enable(adev, false);
4605 
4606 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4607 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4608 
4609 	fw_data = (const __le32 *)
4610 		(adev->gfx.mec_fw->data +
4611 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4612 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4613 
4614 	/* MEC1 */
4615 	WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4616 	for (i = 0; i < fw_size; i++)
4617 		WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4618 	WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4619 
4620 	/* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4621 	if (adev->gfx.mec2_fw) {
4622 		const struct gfx_firmware_header_v1_0 *mec2_hdr;
4623 
4624 		mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4625 		amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4626 
4627 		fw_data = (const __le32 *)
4628 			(adev->gfx.mec2_fw->data +
4629 			 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4630 		fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4631 
4632 		WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4633 		for (i = 0; i < fw_size; i++)
4634 			WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4635 		WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4636 	}
4637 
4638 	return 0;
4639 }
4640 
4641 /* KIQ functions */
4642 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4643 {
4644 	uint32_t tmp;
4645 	struct amdgpu_device *adev = ring->adev;
4646 
4647 	/* tell RLC which is KIQ queue */
4648 	tmp = RREG32(mmRLC_CP_SCHEDULERS);
4649 	tmp &= 0xffffff00;
4650 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4651 	WREG32(mmRLC_CP_SCHEDULERS, tmp);
4652 	tmp |= 0x80;
4653 	WREG32(mmRLC_CP_SCHEDULERS, tmp);
4654 }
4655 
4656 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4657 {
4658 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4659 	uint32_t scratch, tmp = 0;
4660 	int r, i;
4661 
4662 	r = amdgpu_gfx_scratch_get(adev, &scratch);
4663 	if (r) {
4664 		DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4665 		return r;
4666 	}
4667 	WREG32(scratch, 0xCAFEDEAD);
4668 
4669 	r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
4670 	if (r) {
4671 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4672 		amdgpu_gfx_scratch_free(adev, scratch);
4673 		return r;
4674 	}
4675 	/* set resources */
4676 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4677 	amdgpu_ring_write(kiq_ring, 0);	/* vmid_mask:0 queue_type:0 (KIQ) */
4678 	amdgpu_ring_write(kiq_ring, 0x000000FF);	/* queue mask lo */
4679 	amdgpu_ring_write(kiq_ring, 0);	/* queue mask hi */
4680 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
4681 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
4682 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
4683 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
4684 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4685 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4686 		uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4687 		uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4688 
4689 		/* map queues */
4690 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4691 		/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4692 		amdgpu_ring_write(kiq_ring,
4693 				  PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4694 		amdgpu_ring_write(kiq_ring,
4695 				  PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4696 				  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4697 				  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4698 				  PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4699 		amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4700 		amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4701 		amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4702 		amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4703 	}
4704 	/* write to scratch for completion */
4705 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4706 	amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4707 	amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4708 	amdgpu_ring_commit(kiq_ring);
4709 
4710 	for (i = 0; i < adev->usec_timeout; i++) {
4711 		tmp = RREG32(scratch);
4712 		if (tmp == 0xDEADBEEF)
4713 			break;
4714 		DRM_UDELAY(1);
4715 	}
4716 	if (i >= adev->usec_timeout) {
4717 		DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4718 			  scratch, tmp);
4719 		r = -EINVAL;
4720 	}
4721 	amdgpu_gfx_scratch_free(adev, scratch);
4722 
4723 	return r;
4724 }
4725 
4726 static int gfx_v8_0_kiq_kcq_disable(struct amdgpu_device *adev)
4727 {
4728 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4729 	uint32_t scratch, tmp = 0;
4730 	int r, i;
4731 
4732 	r = amdgpu_gfx_scratch_get(adev, &scratch);
4733 	if (r) {
4734 		DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4735 		return r;
4736 	}
4737 	WREG32(scratch, 0xCAFEDEAD);
4738 
4739 	r = amdgpu_ring_alloc(kiq_ring, 6 + 3);
4740 	if (r) {
4741 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4742 		amdgpu_gfx_scratch_free(adev, scratch);
4743 		return r;
4744 	}
4745 	/* unmap queues */
4746 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4747 	amdgpu_ring_write(kiq_ring,
4748 			  PACKET3_UNMAP_QUEUES_ACTION(1)| /* RESET_QUEUES */
4749 			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(2)); /* select all queues */
4750 	amdgpu_ring_write(kiq_ring, 0);
4751 	amdgpu_ring_write(kiq_ring, 0);
4752 	amdgpu_ring_write(kiq_ring, 0);
4753 	amdgpu_ring_write(kiq_ring, 0);
4754 	/* write to scratch for completion */
4755 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4756 	amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4757 	amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4758 	amdgpu_ring_commit(kiq_ring);
4759 
4760 	for (i = 0; i < adev->usec_timeout; i++) {
4761 		tmp = RREG32(scratch);
4762 		if (tmp == 0xDEADBEEF)
4763 			break;
4764 		DRM_UDELAY(1);
4765 	}
4766 	if (i >= adev->usec_timeout) {
4767 		DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n",
4768 			  scratch, tmp);
4769 		r = -EINVAL;
4770 	}
4771 	amdgpu_gfx_scratch_free(adev, scratch);
4772 
4773 	return r;
4774 }
4775 
4776 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4777 {
4778 	struct amdgpu_device *adev = ring->adev;
4779 	struct vi_mqd *mqd = ring->mqd_ptr;
4780 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4781 	uint32_t tmp;
4782 
4783 	mqd->header = 0xC0310800;
4784 	mqd->compute_pipelinestat_enable = 0x00000001;
4785 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4786 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4787 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4788 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4789 	mqd->compute_misc_reserved = 0x00000003;
4790 
4791 	eop_base_addr = ring->eop_gpu_addr >> 8;
4792 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4793 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4794 
4795 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4796 	tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4797 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4798 			(order_base_2(MEC_HPD_SIZE / 4) - 1));
4799 
4800 	mqd->cp_hqd_eop_control = tmp;
4801 
4802 	/* enable doorbell? */
4803 	tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4804 			    CP_HQD_PQ_DOORBELL_CONTROL,
4805 			    DOORBELL_EN,
4806 			    ring->use_doorbell ? 1 : 0);
4807 
4808 	mqd->cp_hqd_pq_doorbell_control = tmp;
4809 
4810 	/* disable the queue if it's active */
4811 	mqd->cp_hqd_dequeue_request = 0;
4812 	mqd->cp_hqd_pq_rptr = 0;
4813 	mqd->cp_hqd_pq_wptr = 0;
4814 
4815 	/* set the pointer to the MQD */
4816 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4817 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4818 
4819 	/* set MQD vmid to 0 */
4820 	tmp = RREG32(mmCP_MQD_CONTROL);
4821 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4822 	mqd->cp_mqd_control = tmp;
4823 
4824 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4825 	hqd_gpu_addr = ring->gpu_addr >> 8;
4826 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4827 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4828 
4829 	/* set up the HQD, this is similar to CP_RB0_CNTL */
4830 	tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4831 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4832 			    (order_base_2(ring->ring_size / 4) - 1));
4833 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4834 			((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4835 #ifdef __BIG_ENDIAN
4836 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4837 #endif
4838 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4839 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4840 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4841 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4842 	mqd->cp_hqd_pq_control = tmp;
4843 
4844 	/* set the wb address whether it's enabled or not */
4845 	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4846 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4847 	mqd->cp_hqd_pq_rptr_report_addr_hi =
4848 		upper_32_bits(wb_gpu_addr) & 0xffff;
4849 
4850 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4851 	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4852 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4853 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4854 
4855 	tmp = 0;
4856 	/* enable the doorbell if requested */
4857 	if (ring->use_doorbell) {
4858 		tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4859 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4860 				DOORBELL_OFFSET, ring->doorbell_index);
4861 
4862 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4863 					 DOORBELL_EN, 1);
4864 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4865 					 DOORBELL_SOURCE, 0);
4866 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4867 					 DOORBELL_HIT, 0);
4868 	}
4869 
4870 	mqd->cp_hqd_pq_doorbell_control = tmp;
4871 
4872 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4873 	ring->wptr = 0;
4874 	mqd->cp_hqd_pq_wptr = ring->wptr;
4875 	mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4876 
4877 	/* set the vmid for the queue */
4878 	mqd->cp_hqd_vmid = 0;
4879 
4880 	tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4881 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4882 	mqd->cp_hqd_persistent_state = tmp;
4883 
4884 	/* set MTYPE */
4885 	tmp = RREG32(mmCP_HQD_IB_CONTROL);
4886 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4887 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4888 	mqd->cp_hqd_ib_control = tmp;
4889 
4890 	tmp = RREG32(mmCP_HQD_IQ_TIMER);
4891 	tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4892 	mqd->cp_hqd_iq_timer = tmp;
4893 
4894 	tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4895 	tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4896 	mqd->cp_hqd_ctx_save_control = tmp;
4897 
4898 	/* activate the queue */
4899 	mqd->cp_hqd_active = 1;
4900 
4901 	return 0;
4902 }
4903 
4904 static int gfx_v8_0_kiq_init_register(struct amdgpu_ring *ring)
4905 {
4906 	struct amdgpu_device *adev = ring->adev;
4907 	struct vi_mqd *mqd = ring->mqd_ptr;
4908 	int j;
4909 
4910 	/* disable wptr polling */
4911 	WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4912 
4913 	WREG32(mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo);
4914 	WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi);
4915 
4916 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4917 	WREG32(mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control);
4918 
4919 	/* enable doorbell? */
4920 	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
4921 
4922 	/* disable the queue if it's active */
4923 	if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4924 		WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4925 		for (j = 0; j < adev->usec_timeout; j++) {
4926 			if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4927 				break;
4928 			udelay(1);
4929 		}
4930 		WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4931 		WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4932 		WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4933 	}
4934 
4935 	/* set the pointer to the MQD */
4936 	WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4937 	WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4938 
4939 	/* set MQD vmid to 0 */
4940 	WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control);
4941 
4942 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4943 	WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4944 	WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4945 
4946 	/* set up the HQD, this is similar to CP_RB0_CNTL */
4947 	WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
4948 
4949 	/* set the wb address whether it's enabled or not */
4950 	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4951 				mqd->cp_hqd_pq_rptr_report_addr_lo);
4952 	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4953 				mqd->cp_hqd_pq_rptr_report_addr_hi);
4954 
4955 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4956 	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
4957 	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi);
4958 
4959 	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
4960 
4961 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4962 	WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4963 
4964 	/* set the vmid for the queue */
4965 	WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4966 
4967 	WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
4968 
4969 	/* activate the queue */
4970 	WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4971 
4972 	return 0;
4973 }
4974 
4975 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4976 {
4977 	struct amdgpu_device *adev = ring->adev;
4978 	struct vi_mqd *mqd = ring->mqd_ptr;
4979 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4980 
4981 	gfx_v8_0_kiq_setting(ring);
4982 
4983 	if (adev->gfx.in_reset) { /* for GPU_RESET case */
4984 		/* reset MQD to a clean status */
4985 		if (adev->gfx.mec.mqd_backup[mqd_idx])
4986 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
4987 
4988 		/* reset ring buffer */
4989 		ring->wptr = 0;
4990 		amdgpu_ring_clear_ring(ring);
4991 		mutex_lock(&adev->srbm_mutex);
4992 		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4993 		gfx_v8_0_kiq_init_register(ring);
4994 		vi_srbm_select(adev, 0, 0, 0, 0);
4995 		mutex_unlock(&adev->srbm_mutex);
4996 	} else {
4997 		memset((void *)mqd, 0, sizeof(*mqd));
4998 		mutex_lock(&adev->srbm_mutex);
4999 		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5000 		gfx_v8_0_mqd_init(ring);
5001 		gfx_v8_0_kiq_init_register(ring);
5002 		vi_srbm_select(adev, 0, 0, 0, 0);
5003 		mutex_unlock(&adev->srbm_mutex);
5004 
5005 		if (adev->gfx.mec.mqd_backup[mqd_idx])
5006 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
5007 	}
5008 
5009 	return 0;
5010 }
5011 
5012 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
5013 {
5014 	struct amdgpu_device *adev = ring->adev;
5015 	struct vi_mqd *mqd = ring->mqd_ptr;
5016 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
5017 
5018 	if (!adev->gfx.in_reset && !adev->gfx.in_suspend) {
5019 		memset((void *)mqd, 0, sizeof(*mqd));
5020 		mutex_lock(&adev->srbm_mutex);
5021 		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5022 		gfx_v8_0_mqd_init(ring);
5023 		vi_srbm_select(adev, 0, 0, 0, 0);
5024 		mutex_unlock(&adev->srbm_mutex);
5025 
5026 		if (adev->gfx.mec.mqd_backup[mqd_idx])
5027 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
5028 	} else if (adev->gfx.in_reset) { /* for GPU_RESET case */
5029 		/* reset MQD to a clean status */
5030 		if (adev->gfx.mec.mqd_backup[mqd_idx])
5031 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
5032 		/* reset ring buffer */
5033 		ring->wptr = 0;
5034 		amdgpu_ring_clear_ring(ring);
5035 	} else {
5036 		amdgpu_ring_clear_ring(ring);
5037 	}
5038 	return 0;
5039 }
5040 
5041 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
5042 {
5043 	if (adev->asic_type > CHIP_TONGA) {
5044 		WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
5045 		WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
5046 	}
5047 	/* enable doorbells */
5048 	WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
5049 }
5050 
5051 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
5052 {
5053 	struct amdgpu_ring *ring = NULL;
5054 	int r = 0, i;
5055 
5056 	gfx_v8_0_cp_compute_enable(adev, true);
5057 
5058 	ring = &adev->gfx.kiq.ring;
5059 
5060 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
5061 	if (unlikely(r != 0))
5062 		goto done;
5063 
5064 	r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
5065 	if (!r) {
5066 		r = gfx_v8_0_kiq_init_queue(ring);
5067 		amdgpu_bo_kunmap(ring->mqd_obj);
5068 		ring->mqd_ptr = NULL;
5069 	}
5070 	amdgpu_bo_unreserve(ring->mqd_obj);
5071 	if (r)
5072 		goto done;
5073 
5074 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5075 		ring = &adev->gfx.compute_ring[i];
5076 
5077 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
5078 		if (unlikely(r != 0))
5079 			goto done;
5080 		r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
5081 		if (!r) {
5082 			r = gfx_v8_0_kcq_init_queue(ring);
5083 			amdgpu_bo_kunmap(ring->mqd_obj);
5084 			ring->mqd_ptr = NULL;
5085 		}
5086 		amdgpu_bo_unreserve(ring->mqd_obj);
5087 		if (r)
5088 			goto done;
5089 	}
5090 
5091 	gfx_v8_0_set_mec_doorbell_range(adev);
5092 
5093 	r = gfx_v8_0_kiq_kcq_enable(adev);
5094 	if (r)
5095 		goto done;
5096 
5097 	/* Test KIQ */
5098 	ring = &adev->gfx.kiq.ring;
5099 	ring->ready = true;
5100 	r = amdgpu_ring_test_ring(ring);
5101 	if (r) {
5102 		ring->ready = false;
5103 		goto done;
5104 	}
5105 
5106 	/* Test KCQs */
5107 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5108 		ring = &adev->gfx.compute_ring[i];
5109 		ring->ready = true;
5110 		r = amdgpu_ring_test_ring(ring);
5111 		if (r)
5112 			ring->ready = false;
5113 	}
5114 
5115 done:
5116 	return r;
5117 }
5118 
5119 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
5120 {
5121 	int r;
5122 
5123 	if (!(adev->flags & AMD_IS_APU))
5124 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5125 
5126 	if (!adev->pp_enabled) {
5127 		if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
5128 			/* legacy firmware loading */
5129 			r = gfx_v8_0_cp_gfx_load_microcode(adev);
5130 			if (r)
5131 				return r;
5132 
5133 			r = gfx_v8_0_cp_compute_load_microcode(adev);
5134 			if (r)
5135 				return r;
5136 		} else {
5137 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5138 							AMDGPU_UCODE_ID_CP_CE);
5139 			if (r)
5140 				return -EINVAL;
5141 
5142 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5143 							AMDGPU_UCODE_ID_CP_PFP);
5144 			if (r)
5145 				return -EINVAL;
5146 
5147 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5148 							AMDGPU_UCODE_ID_CP_ME);
5149 			if (r)
5150 				return -EINVAL;
5151 
5152 			if (adev->asic_type == CHIP_TOPAZ) {
5153 				r = gfx_v8_0_cp_compute_load_microcode(adev);
5154 				if (r)
5155 					return r;
5156 			} else {
5157 				r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5158 										 AMDGPU_UCODE_ID_CP_MEC1);
5159 				if (r)
5160 					return -EINVAL;
5161 			}
5162 		}
5163 	}
5164 
5165 	r = gfx_v8_0_cp_gfx_resume(adev);
5166 	if (r)
5167 		return r;
5168 
5169 	r = gfx_v8_0_kiq_resume(adev);
5170 	if (r)
5171 		return r;
5172 
5173 	gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5174 
5175 	return 0;
5176 }
5177 
5178 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5179 {
5180 	gfx_v8_0_cp_gfx_enable(adev, enable);
5181 	gfx_v8_0_cp_compute_enable(adev, enable);
5182 }
5183 
5184 static int gfx_v8_0_hw_init(void *handle)
5185 {
5186 	int r;
5187 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5188 
5189 	gfx_v8_0_init_golden_registers(adev);
5190 	gfx_v8_0_gpu_init(adev);
5191 
5192 	r = gfx_v8_0_rlc_resume(adev);
5193 	if (r)
5194 		return r;
5195 
5196 	r = gfx_v8_0_cp_resume(adev);
5197 
5198 	return r;
5199 }
5200 
5201 static int gfx_v8_0_hw_fini(void *handle)
5202 {
5203 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5204 
5205 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5206 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5207 	if (amdgpu_sriov_vf(adev)) {
5208 		pr_debug("For SRIOV client, shouldn't do anything.\n");
5209 		return 0;
5210 	}
5211 	gfx_v8_0_kiq_kcq_disable(adev);
5212 	gfx_v8_0_cp_enable(adev, false);
5213 	gfx_v8_0_rlc_stop(adev);
5214 
5215 	amdgpu_set_powergating_state(adev,
5216 			AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5217 
5218 	return 0;
5219 }
5220 
5221 static int gfx_v8_0_suspend(void *handle)
5222 {
5223 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5224 	adev->gfx.in_suspend = true;
5225 	return gfx_v8_0_hw_fini(adev);
5226 }
5227 
5228 static int gfx_v8_0_resume(void *handle)
5229 {
5230 	int r;
5231 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5232 
5233 	r = gfx_v8_0_hw_init(adev);
5234 	adev->gfx.in_suspend = false;
5235 	return r;
5236 }
5237 
5238 static bool gfx_v8_0_is_idle(void *handle)
5239 {
5240 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5241 
5242 	if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5243 		return false;
5244 	else
5245 		return true;
5246 }
5247 
5248 static int gfx_v8_0_wait_for_idle(void *handle)
5249 {
5250 	unsigned i;
5251 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5252 
5253 	for (i = 0; i < adev->usec_timeout; i++) {
5254 		if (gfx_v8_0_is_idle(handle))
5255 			return 0;
5256 
5257 		udelay(1);
5258 	}
5259 	return -ETIMEDOUT;
5260 }
5261 
5262 static bool gfx_v8_0_check_soft_reset(void *handle)
5263 {
5264 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5265 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5266 	u32 tmp;
5267 
5268 	/* GRBM_STATUS */
5269 	tmp = RREG32(mmGRBM_STATUS);
5270 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5271 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5272 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5273 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5274 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5275 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5276 		   GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5277 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5278 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5279 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5280 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5281 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5282 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5283 	}
5284 
5285 	/* GRBM_STATUS2 */
5286 	tmp = RREG32(mmGRBM_STATUS2);
5287 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5288 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5289 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5290 
5291 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5292 	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5293 	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5294 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5295 						SOFT_RESET_CPF, 1);
5296 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5297 						SOFT_RESET_CPC, 1);
5298 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5299 						SOFT_RESET_CPG, 1);
5300 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5301 						SOFT_RESET_GRBM, 1);
5302 	}
5303 
5304 	/* SRBM_STATUS */
5305 	tmp = RREG32(mmSRBM_STATUS);
5306 	if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5307 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5308 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5309 	if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5310 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5311 						SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5312 
5313 	if (grbm_soft_reset || srbm_soft_reset) {
5314 		adev->gfx.grbm_soft_reset = grbm_soft_reset;
5315 		adev->gfx.srbm_soft_reset = srbm_soft_reset;
5316 		return true;
5317 	} else {
5318 		adev->gfx.grbm_soft_reset = 0;
5319 		adev->gfx.srbm_soft_reset = 0;
5320 		return false;
5321 	}
5322 }
5323 
5324 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
5325 {
5326 	int i, r = 0;
5327 
5328 	if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
5329 		WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
5330 		for (i = 0; i < adev->usec_timeout; i++) {
5331 			if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
5332 				break;
5333 			udelay(1);
5334 		}
5335 		if (i == adev->usec_timeout)
5336 			r = -ETIMEDOUT;
5337 	}
5338 	WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
5339 	WREG32(mmCP_HQD_PQ_RPTR, 0);
5340 	WREG32(mmCP_HQD_PQ_WPTR, 0);
5341 
5342 	return r;
5343 }
5344 
5345 static int gfx_v8_0_pre_soft_reset(void *handle)
5346 {
5347 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5348 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5349 
5350 	if ((!adev->gfx.grbm_soft_reset) &&
5351 	    (!adev->gfx.srbm_soft_reset))
5352 		return 0;
5353 
5354 	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5355 	srbm_soft_reset = adev->gfx.srbm_soft_reset;
5356 
5357 	/* stop the rlc */
5358 	gfx_v8_0_rlc_stop(adev);
5359 
5360 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5361 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5362 		/* Disable GFX parsing/prefetching */
5363 		gfx_v8_0_cp_gfx_enable(adev, false);
5364 
5365 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5366 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5367 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5368 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5369 		int i;
5370 
5371 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5372 			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5373 
5374 			mutex_lock(&adev->srbm_mutex);
5375 			vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5376 			gfx_v8_0_deactivate_hqd(adev, 2);
5377 			vi_srbm_select(adev, 0, 0, 0, 0);
5378 			mutex_unlock(&adev->srbm_mutex);
5379 		}
5380 		/* Disable MEC parsing/prefetching */
5381 		gfx_v8_0_cp_compute_enable(adev, false);
5382 	}
5383 
5384        return 0;
5385 }
5386 
5387 static int gfx_v8_0_soft_reset(void *handle)
5388 {
5389 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5390 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5391 	u32 tmp;
5392 
5393 	if ((!adev->gfx.grbm_soft_reset) &&
5394 	    (!adev->gfx.srbm_soft_reset))
5395 		return 0;
5396 
5397 	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5398 	srbm_soft_reset = adev->gfx.srbm_soft_reset;
5399 
5400 	if (grbm_soft_reset || srbm_soft_reset) {
5401 		tmp = RREG32(mmGMCON_DEBUG);
5402 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5403 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5404 		WREG32(mmGMCON_DEBUG, tmp);
5405 		udelay(50);
5406 	}
5407 
5408 	if (grbm_soft_reset) {
5409 		tmp = RREG32(mmGRBM_SOFT_RESET);
5410 		tmp |= grbm_soft_reset;
5411 		dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5412 		WREG32(mmGRBM_SOFT_RESET, tmp);
5413 		tmp = RREG32(mmGRBM_SOFT_RESET);
5414 
5415 		udelay(50);
5416 
5417 		tmp &= ~grbm_soft_reset;
5418 		WREG32(mmGRBM_SOFT_RESET, tmp);
5419 		tmp = RREG32(mmGRBM_SOFT_RESET);
5420 	}
5421 
5422 	if (srbm_soft_reset) {
5423 		tmp = RREG32(mmSRBM_SOFT_RESET);
5424 		tmp |= srbm_soft_reset;
5425 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5426 		WREG32(mmSRBM_SOFT_RESET, tmp);
5427 		tmp = RREG32(mmSRBM_SOFT_RESET);
5428 
5429 		udelay(50);
5430 
5431 		tmp &= ~srbm_soft_reset;
5432 		WREG32(mmSRBM_SOFT_RESET, tmp);
5433 		tmp = RREG32(mmSRBM_SOFT_RESET);
5434 	}
5435 
5436 	if (grbm_soft_reset || srbm_soft_reset) {
5437 		tmp = RREG32(mmGMCON_DEBUG);
5438 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5439 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5440 		WREG32(mmGMCON_DEBUG, tmp);
5441 	}
5442 
5443 	/* Wait a little for things to settle down */
5444 	udelay(50);
5445 
5446 	return 0;
5447 }
5448 
5449 static int gfx_v8_0_post_soft_reset(void *handle)
5450 {
5451 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5452 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5453 
5454 	if ((!adev->gfx.grbm_soft_reset) &&
5455 	    (!adev->gfx.srbm_soft_reset))
5456 		return 0;
5457 
5458 	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5459 	srbm_soft_reset = adev->gfx.srbm_soft_reset;
5460 
5461 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5462 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5463 		gfx_v8_0_cp_gfx_resume(adev);
5464 
5465 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5466 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5467 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5468 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5469 		int i;
5470 
5471 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5472 			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5473 
5474 			mutex_lock(&adev->srbm_mutex);
5475 			vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5476 			gfx_v8_0_deactivate_hqd(adev, 2);
5477 			vi_srbm_select(adev, 0, 0, 0, 0);
5478 			mutex_unlock(&adev->srbm_mutex);
5479 		}
5480 		gfx_v8_0_kiq_resume(adev);
5481 	}
5482 	gfx_v8_0_rlc_start(adev);
5483 
5484 	return 0;
5485 }
5486 
5487 /**
5488  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5489  *
5490  * @adev: amdgpu_device pointer
5491  *
5492  * Fetches a GPU clock counter snapshot.
5493  * Returns the 64 bit clock counter snapshot.
5494  */
5495 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5496 {
5497 	uint64_t clock;
5498 
5499 	mutex_lock(&adev->gfx.gpu_clock_mutex);
5500 	WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5501 	clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5502 		((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5503 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
5504 	return clock;
5505 }
5506 
5507 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5508 					  uint32_t vmid,
5509 					  uint32_t gds_base, uint32_t gds_size,
5510 					  uint32_t gws_base, uint32_t gws_size,
5511 					  uint32_t oa_base, uint32_t oa_size)
5512 {
5513 	gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5514 	gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5515 
5516 	gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5517 	gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5518 
5519 	oa_base = oa_base >> AMDGPU_OA_SHIFT;
5520 	oa_size = oa_size >> AMDGPU_OA_SHIFT;
5521 
5522 	/* GDS Base */
5523 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5524 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5525 				WRITE_DATA_DST_SEL(0)));
5526 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5527 	amdgpu_ring_write(ring, 0);
5528 	amdgpu_ring_write(ring, gds_base);
5529 
5530 	/* GDS Size */
5531 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5532 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5533 				WRITE_DATA_DST_SEL(0)));
5534 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5535 	amdgpu_ring_write(ring, 0);
5536 	amdgpu_ring_write(ring, gds_size);
5537 
5538 	/* GWS */
5539 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5540 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5541 				WRITE_DATA_DST_SEL(0)));
5542 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5543 	amdgpu_ring_write(ring, 0);
5544 	amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5545 
5546 	/* OA */
5547 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5548 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5549 				WRITE_DATA_DST_SEL(0)));
5550 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5551 	amdgpu_ring_write(ring, 0);
5552 	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5553 }
5554 
5555 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5556 {
5557 	WREG32(mmSQ_IND_INDEX,
5558 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5559 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5560 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
5561 		(SQ_IND_INDEX__FORCE_READ_MASK));
5562 	return RREG32(mmSQ_IND_DATA);
5563 }
5564 
5565 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5566 			   uint32_t wave, uint32_t thread,
5567 			   uint32_t regno, uint32_t num, uint32_t *out)
5568 {
5569 	WREG32(mmSQ_IND_INDEX,
5570 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5571 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5572 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
5573 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5574 		(SQ_IND_INDEX__FORCE_READ_MASK) |
5575 		(SQ_IND_INDEX__AUTO_INCR_MASK));
5576 	while (num--)
5577 		*(out++) = RREG32(mmSQ_IND_DATA);
5578 }
5579 
5580 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5581 {
5582 	/* type 0 wave data */
5583 	dst[(*no_fields)++] = 0;
5584 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5585 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5586 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5587 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5588 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5589 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5590 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5591 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5592 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5593 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5594 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5595 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5596 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5597 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5598 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5599 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5600 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5601 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5602 }
5603 
5604 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5605 				     uint32_t wave, uint32_t start,
5606 				     uint32_t size, uint32_t *dst)
5607 {
5608 	wave_read_regs(
5609 		adev, simd, wave, 0,
5610 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5611 }
5612 
5613 
5614 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5615 	.get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5616 	.select_se_sh = &gfx_v8_0_select_se_sh,
5617 	.read_wave_data = &gfx_v8_0_read_wave_data,
5618 	.read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5619 };
5620 
5621 static int gfx_v8_0_early_init(void *handle)
5622 {
5623 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5624 
5625 	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5626 	adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5627 	adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5628 	gfx_v8_0_set_ring_funcs(adev);
5629 	gfx_v8_0_set_irq_funcs(adev);
5630 	gfx_v8_0_set_gds_init(adev);
5631 	gfx_v8_0_set_rlc_funcs(adev);
5632 
5633 	return 0;
5634 }
5635 
5636 static int gfx_v8_0_late_init(void *handle)
5637 {
5638 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5639 	int r;
5640 
5641 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5642 	if (r)
5643 		return r;
5644 
5645 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5646 	if (r)
5647 		return r;
5648 
5649 	/* requires IBs so do in late init after IB pool is initialized */
5650 	r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5651 	if (r)
5652 		return r;
5653 
5654 	amdgpu_set_powergating_state(adev,
5655 			AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5656 
5657 	return 0;
5658 }
5659 
5660 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5661 						       bool enable)
5662 {
5663 	if ((adev->asic_type == CHIP_POLARIS11) ||
5664 	    (adev->asic_type == CHIP_POLARIS12))
5665 		/* Send msg to SMU via Powerplay */
5666 		amdgpu_set_powergating_state(adev,
5667 					     AMD_IP_BLOCK_TYPE_SMC,
5668 					     enable ?
5669 					     AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5670 
5671 	WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5672 }
5673 
5674 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5675 							bool enable)
5676 {
5677 	WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5678 }
5679 
5680 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5681 		bool enable)
5682 {
5683 	WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5684 }
5685 
5686 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5687 					  bool enable)
5688 {
5689 	WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5690 }
5691 
5692 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5693 						bool enable)
5694 {
5695 	WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5696 
5697 	/* Read any GFX register to wake up GFX. */
5698 	if (!enable)
5699 		RREG32(mmDB_RENDER_CONTROL);
5700 }
5701 
5702 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5703 					  bool enable)
5704 {
5705 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5706 		cz_enable_gfx_cg_power_gating(adev, true);
5707 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5708 			cz_enable_gfx_pipeline_power_gating(adev, true);
5709 	} else {
5710 		cz_enable_gfx_cg_power_gating(adev, false);
5711 		cz_enable_gfx_pipeline_power_gating(adev, false);
5712 	}
5713 }
5714 
5715 static int gfx_v8_0_set_powergating_state(void *handle,
5716 					  enum amd_powergating_state state)
5717 {
5718 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5719 	bool enable = (state == AMD_PG_STATE_GATE);
5720 
5721 	if (amdgpu_sriov_vf(adev))
5722 		return 0;
5723 
5724 	switch (adev->asic_type) {
5725 	case CHIP_CARRIZO:
5726 	case CHIP_STONEY:
5727 
5728 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5729 			cz_enable_sck_slow_down_on_power_up(adev, true);
5730 			cz_enable_sck_slow_down_on_power_down(adev, true);
5731 		} else {
5732 			cz_enable_sck_slow_down_on_power_up(adev, false);
5733 			cz_enable_sck_slow_down_on_power_down(adev, false);
5734 		}
5735 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5736 			cz_enable_cp_power_gating(adev, true);
5737 		else
5738 			cz_enable_cp_power_gating(adev, false);
5739 
5740 		cz_update_gfx_cg_power_gating(adev, enable);
5741 
5742 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5743 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5744 		else
5745 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5746 
5747 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5748 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5749 		else
5750 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5751 		break;
5752 	case CHIP_POLARIS11:
5753 	case CHIP_POLARIS12:
5754 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5755 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5756 		else
5757 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5758 
5759 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5760 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5761 		else
5762 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5763 
5764 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5765 			polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5766 		else
5767 			polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5768 		break;
5769 	default:
5770 		break;
5771 	}
5772 
5773 	return 0;
5774 }
5775 
5776 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5777 {
5778 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5779 	int data;
5780 
5781 	if (amdgpu_sriov_vf(adev))
5782 		*flags = 0;
5783 
5784 	/* AMD_CG_SUPPORT_GFX_MGCG */
5785 	data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5786 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5787 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5788 
5789 	/* AMD_CG_SUPPORT_GFX_CGLG */
5790 	data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5791 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5792 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5793 
5794 	/* AMD_CG_SUPPORT_GFX_CGLS */
5795 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5796 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5797 
5798 	/* AMD_CG_SUPPORT_GFX_CGTS */
5799 	data = RREG32(mmCGTS_SM_CTRL_REG);
5800 	if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5801 		*flags |= AMD_CG_SUPPORT_GFX_CGTS;
5802 
5803 	/* AMD_CG_SUPPORT_GFX_CGTS_LS */
5804 	if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5805 		*flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5806 
5807 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
5808 	data = RREG32(mmRLC_MEM_SLP_CNTL);
5809 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5810 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5811 
5812 	/* AMD_CG_SUPPORT_GFX_CP_LS */
5813 	data = RREG32(mmCP_MEM_SLP_CNTL);
5814 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5815 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5816 }
5817 
5818 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5819 				     uint32_t reg_addr, uint32_t cmd)
5820 {
5821 	uint32_t data;
5822 
5823 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5824 
5825 	WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5826 	WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5827 
5828 	data = RREG32(mmRLC_SERDES_WR_CTRL);
5829 	if (adev->asic_type == CHIP_STONEY)
5830 		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5831 			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5832 			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5833 			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5834 			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5835 			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5836 			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5837 			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5838 			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5839 	else
5840 		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5841 			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5842 			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5843 			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5844 			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5845 			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5846 			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5847 			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5848 			  RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5849 			  RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5850 			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5851 	data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5852 		 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5853 		 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5854 		 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5855 
5856 	WREG32(mmRLC_SERDES_WR_CTRL, data);
5857 }
5858 
5859 #define MSG_ENTER_RLC_SAFE_MODE     1
5860 #define MSG_EXIT_RLC_SAFE_MODE      0
5861 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5862 #define RLC_GPR_REG2__REQ__SHIFT 0
5863 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5864 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5865 
5866 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5867 {
5868 	u32 data;
5869 	unsigned i;
5870 
5871 	data = RREG32(mmRLC_CNTL);
5872 	if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5873 		return;
5874 
5875 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5876 		data |= RLC_SAFE_MODE__CMD_MASK;
5877 		data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5878 		data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5879 		WREG32(mmRLC_SAFE_MODE, data);
5880 
5881 		for (i = 0; i < adev->usec_timeout; i++) {
5882 			if ((RREG32(mmRLC_GPM_STAT) &
5883 			     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5884 			      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5885 			    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5886 			     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5887 				break;
5888 			udelay(1);
5889 		}
5890 
5891 		for (i = 0; i < adev->usec_timeout; i++) {
5892 			if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5893 				break;
5894 			udelay(1);
5895 		}
5896 		adev->gfx.rlc.in_safe_mode = true;
5897 	}
5898 }
5899 
5900 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5901 {
5902 	u32 data = 0;
5903 	unsigned i;
5904 
5905 	data = RREG32(mmRLC_CNTL);
5906 	if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5907 		return;
5908 
5909 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5910 		if (adev->gfx.rlc.in_safe_mode) {
5911 			data |= RLC_SAFE_MODE__CMD_MASK;
5912 			data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5913 			WREG32(mmRLC_SAFE_MODE, data);
5914 			adev->gfx.rlc.in_safe_mode = false;
5915 		}
5916 	}
5917 
5918 	for (i = 0; i < adev->usec_timeout; i++) {
5919 		if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5920 			break;
5921 		udelay(1);
5922 	}
5923 }
5924 
5925 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5926 	.enter_safe_mode = iceland_enter_rlc_safe_mode,
5927 	.exit_safe_mode = iceland_exit_rlc_safe_mode
5928 };
5929 
5930 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5931 						      bool enable)
5932 {
5933 	uint32_t temp, data;
5934 
5935 	adev->gfx.rlc.funcs->enter_safe_mode(adev);
5936 
5937 	/* It is disabled by HW by default */
5938 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5939 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5940 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5941 				/* 1 - RLC memory Light sleep */
5942 				WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5943 
5944 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5945 				WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5946 		}
5947 
5948 		/* 3 - RLC_CGTT_MGCG_OVERRIDE */
5949 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5950 		if (adev->flags & AMD_IS_APU)
5951 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5952 				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5953 				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5954 		else
5955 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5956 				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5957 				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5958 				  RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5959 
5960 		if (temp != data)
5961 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5962 
5963 		/* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5964 		gfx_v8_0_wait_for_rlc_serdes(adev);
5965 
5966 		/* 5 - clear mgcg override */
5967 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5968 
5969 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5970 			/* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5971 			temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5972 			data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5973 			data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5974 			data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5975 			data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5976 			if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5977 			    (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5978 				data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5979 			data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5980 			data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5981 			if (temp != data)
5982 				WREG32(mmCGTS_SM_CTRL_REG, data);
5983 		}
5984 		udelay(50);
5985 
5986 		/* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5987 		gfx_v8_0_wait_for_rlc_serdes(adev);
5988 	} else {
5989 		/* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5990 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5991 		data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5992 				RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5993 				RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5994 				RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5995 		if (temp != data)
5996 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5997 
5998 		/* 2 - disable MGLS in RLC */
5999 		data = RREG32(mmRLC_MEM_SLP_CNTL);
6000 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
6001 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
6002 			WREG32(mmRLC_MEM_SLP_CNTL, data);
6003 		}
6004 
6005 		/* 3 - disable MGLS in CP */
6006 		data = RREG32(mmCP_MEM_SLP_CNTL);
6007 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
6008 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
6009 			WREG32(mmCP_MEM_SLP_CNTL, data);
6010 		}
6011 
6012 		/* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
6013 		temp = data = RREG32(mmCGTS_SM_CTRL_REG);
6014 		data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
6015 				CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
6016 		if (temp != data)
6017 			WREG32(mmCGTS_SM_CTRL_REG, data);
6018 
6019 		/* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6020 		gfx_v8_0_wait_for_rlc_serdes(adev);
6021 
6022 		/* 6 - set mgcg override */
6023 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6024 
6025 		udelay(50);
6026 
6027 		/* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6028 		gfx_v8_0_wait_for_rlc_serdes(adev);
6029 	}
6030 
6031 	adev->gfx.rlc.funcs->exit_safe_mode(adev);
6032 }
6033 
6034 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
6035 						      bool enable)
6036 {
6037 	uint32_t temp, temp1, data, data1;
6038 
6039 	temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
6040 
6041 	adev->gfx.rlc.funcs->enter_safe_mode(adev);
6042 
6043 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
6044 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6045 		data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
6046 		if (temp1 != data1)
6047 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6048 
6049 		/* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6050 		gfx_v8_0_wait_for_rlc_serdes(adev);
6051 
6052 		/* 2 - clear cgcg override */
6053 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6054 
6055 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6056 		gfx_v8_0_wait_for_rlc_serdes(adev);
6057 
6058 		/* 3 - write cmd to set CGLS */
6059 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
6060 
6061 		/* 4 - enable cgcg */
6062 		data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
6063 
6064 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6065 			/* enable cgls*/
6066 			data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6067 
6068 			temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6069 			data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
6070 
6071 			if (temp1 != data1)
6072 				WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6073 		} else {
6074 			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6075 		}
6076 
6077 		if (temp != data)
6078 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6079 
6080 		/* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
6081 		 * Cmp_busy/GFX_Idle interrupts
6082 		 */
6083 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6084 	} else {
6085 		/* disable cntx_empty_int_enable & GFX Idle interrupt */
6086 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
6087 
6088 		/* TEST CGCG */
6089 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6090 		data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
6091 				RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
6092 		if (temp1 != data1)
6093 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6094 
6095 		/* read gfx register to wake up cgcg */
6096 		RREG32(mmCB_CGTT_SCLK_CTRL);
6097 		RREG32(mmCB_CGTT_SCLK_CTRL);
6098 		RREG32(mmCB_CGTT_SCLK_CTRL);
6099 		RREG32(mmCB_CGTT_SCLK_CTRL);
6100 
6101 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6102 		gfx_v8_0_wait_for_rlc_serdes(adev);
6103 
6104 		/* write cmd to Set CGCG Overrride */
6105 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6106 
6107 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6108 		gfx_v8_0_wait_for_rlc_serdes(adev);
6109 
6110 		/* write cmd to Clear CGLS */
6111 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
6112 
6113 		/* disable cgcg, cgls should be disabled too. */
6114 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
6115 			  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
6116 		if (temp != data)
6117 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6118 		/* enable interrupts again for PG */
6119 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6120 	}
6121 
6122 	gfx_v8_0_wait_for_rlc_serdes(adev);
6123 
6124 	adev->gfx.rlc.funcs->exit_safe_mode(adev);
6125 }
6126 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
6127 					    bool enable)
6128 {
6129 	if (enable) {
6130 		/* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
6131 		 * ===  MGCG + MGLS + TS(CG/LS) ===
6132 		 */
6133 		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6134 		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6135 	} else {
6136 		/* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
6137 		 * ===  CGCG + CGLS ===
6138 		 */
6139 		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6140 		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6141 	}
6142 	return 0;
6143 }
6144 
6145 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
6146 					  enum amd_clockgating_state state)
6147 {
6148 	uint32_t msg_id, pp_state = 0;
6149 	uint32_t pp_support_state = 0;
6150 	void *pp_handle = adev->powerplay.pp_handle;
6151 
6152 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6153 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6154 			pp_support_state = PP_STATE_SUPPORT_LS;
6155 			pp_state = PP_STATE_LS;
6156 		}
6157 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6158 			pp_support_state |= PP_STATE_SUPPORT_CG;
6159 			pp_state |= PP_STATE_CG;
6160 		}
6161 		if (state == AMD_CG_STATE_UNGATE)
6162 			pp_state = 0;
6163 
6164 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6165 				PP_BLOCK_GFX_CG,
6166 				pp_support_state,
6167 				pp_state);
6168 		amd_set_clockgating_by_smu(pp_handle, msg_id);
6169 	}
6170 
6171 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6172 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6173 			pp_support_state = PP_STATE_SUPPORT_LS;
6174 			pp_state = PP_STATE_LS;
6175 		}
6176 
6177 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6178 			pp_support_state |= PP_STATE_SUPPORT_CG;
6179 			pp_state |= PP_STATE_CG;
6180 		}
6181 
6182 		if (state == AMD_CG_STATE_UNGATE)
6183 			pp_state = 0;
6184 
6185 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6186 				PP_BLOCK_GFX_MG,
6187 				pp_support_state,
6188 				pp_state);
6189 		amd_set_clockgating_by_smu(pp_handle, msg_id);
6190 	}
6191 
6192 	return 0;
6193 }
6194 
6195 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6196 					  enum amd_clockgating_state state)
6197 {
6198 
6199 	uint32_t msg_id, pp_state = 0;
6200 	uint32_t pp_support_state = 0;
6201 	void *pp_handle = adev->powerplay.pp_handle;
6202 
6203 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6204 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6205 			pp_support_state = PP_STATE_SUPPORT_LS;
6206 			pp_state = PP_STATE_LS;
6207 		}
6208 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6209 			pp_support_state |= PP_STATE_SUPPORT_CG;
6210 			pp_state |= PP_STATE_CG;
6211 		}
6212 		if (state == AMD_CG_STATE_UNGATE)
6213 			pp_state = 0;
6214 
6215 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6216 				PP_BLOCK_GFX_CG,
6217 				pp_support_state,
6218 				pp_state);
6219 		amd_set_clockgating_by_smu(pp_handle, msg_id);
6220 	}
6221 
6222 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6223 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6224 			pp_support_state = PP_STATE_SUPPORT_LS;
6225 			pp_state = PP_STATE_LS;
6226 		}
6227 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6228 			pp_support_state |= PP_STATE_SUPPORT_CG;
6229 			pp_state |= PP_STATE_CG;
6230 		}
6231 		if (state == AMD_CG_STATE_UNGATE)
6232 			pp_state = 0;
6233 
6234 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6235 				PP_BLOCK_GFX_3D,
6236 				pp_support_state,
6237 				pp_state);
6238 		amd_set_clockgating_by_smu(pp_handle, msg_id);
6239 	}
6240 
6241 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6242 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6243 			pp_support_state = PP_STATE_SUPPORT_LS;
6244 			pp_state = PP_STATE_LS;
6245 		}
6246 
6247 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6248 			pp_support_state |= PP_STATE_SUPPORT_CG;
6249 			pp_state |= PP_STATE_CG;
6250 		}
6251 
6252 		if (state == AMD_CG_STATE_UNGATE)
6253 			pp_state = 0;
6254 
6255 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6256 				PP_BLOCK_GFX_MG,
6257 				pp_support_state,
6258 				pp_state);
6259 		amd_set_clockgating_by_smu(pp_handle, msg_id);
6260 	}
6261 
6262 	if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6263 		pp_support_state = PP_STATE_SUPPORT_LS;
6264 
6265 		if (state == AMD_CG_STATE_UNGATE)
6266 			pp_state = 0;
6267 		else
6268 			pp_state = PP_STATE_LS;
6269 
6270 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6271 				PP_BLOCK_GFX_RLC,
6272 				pp_support_state,
6273 				pp_state);
6274 		amd_set_clockgating_by_smu(pp_handle, msg_id);
6275 	}
6276 
6277 	if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6278 		pp_support_state = PP_STATE_SUPPORT_LS;
6279 
6280 		if (state == AMD_CG_STATE_UNGATE)
6281 			pp_state = 0;
6282 		else
6283 			pp_state = PP_STATE_LS;
6284 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6285 			PP_BLOCK_GFX_CP,
6286 			pp_support_state,
6287 			pp_state);
6288 		amd_set_clockgating_by_smu(pp_handle, msg_id);
6289 	}
6290 
6291 	return 0;
6292 }
6293 
6294 static int gfx_v8_0_set_clockgating_state(void *handle,
6295 					  enum amd_clockgating_state state)
6296 {
6297 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6298 
6299 	if (amdgpu_sriov_vf(adev))
6300 		return 0;
6301 
6302 	switch (adev->asic_type) {
6303 	case CHIP_FIJI:
6304 	case CHIP_CARRIZO:
6305 	case CHIP_STONEY:
6306 		gfx_v8_0_update_gfx_clock_gating(adev,
6307 						 state == AMD_CG_STATE_GATE);
6308 		break;
6309 	case CHIP_TONGA:
6310 		gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6311 		break;
6312 	case CHIP_POLARIS10:
6313 	case CHIP_POLARIS11:
6314 	case CHIP_POLARIS12:
6315 		gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6316 		break;
6317 	default:
6318 		break;
6319 	}
6320 	return 0;
6321 }
6322 
6323 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6324 {
6325 	return ring->adev->wb.wb[ring->rptr_offs];
6326 }
6327 
6328 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6329 {
6330 	struct amdgpu_device *adev = ring->adev;
6331 
6332 	if (ring->use_doorbell)
6333 		/* XXX check if swapping is necessary on BE */
6334 		return ring->adev->wb.wb[ring->wptr_offs];
6335 	else
6336 		return RREG32(mmCP_RB0_WPTR);
6337 }
6338 
6339 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6340 {
6341 	struct amdgpu_device *adev = ring->adev;
6342 
6343 	if (ring->use_doorbell) {
6344 		/* XXX check if swapping is necessary on BE */
6345 		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6346 		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6347 	} else {
6348 		WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6349 		(void)RREG32(mmCP_RB0_WPTR);
6350 	}
6351 }
6352 
6353 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6354 {
6355 	u32 ref_and_mask, reg_mem_engine;
6356 
6357 	if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6358 	    (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6359 		switch (ring->me) {
6360 		case 1:
6361 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6362 			break;
6363 		case 2:
6364 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6365 			break;
6366 		default:
6367 			return;
6368 		}
6369 		reg_mem_engine = 0;
6370 	} else {
6371 		ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6372 		reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6373 	}
6374 
6375 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6376 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6377 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
6378 				 reg_mem_engine));
6379 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6380 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6381 	amdgpu_ring_write(ring, ref_and_mask);
6382 	amdgpu_ring_write(ring, ref_and_mask);
6383 	amdgpu_ring_write(ring, 0x20); /* poll interval */
6384 }
6385 
6386 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6387 {
6388 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6389 	amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6390 		EVENT_INDEX(4));
6391 
6392 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6393 	amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6394 		EVENT_INDEX(0));
6395 }
6396 
6397 
6398 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6399 {
6400 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6401 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6402 				 WRITE_DATA_DST_SEL(0) |
6403 				 WR_CONFIRM));
6404 	amdgpu_ring_write(ring, mmHDP_DEBUG0);
6405 	amdgpu_ring_write(ring, 0);
6406 	amdgpu_ring_write(ring, 1);
6407 
6408 }
6409 
6410 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6411 				      struct amdgpu_ib *ib,
6412 				      unsigned vm_id, bool ctx_switch)
6413 {
6414 	u32 header, control = 0;
6415 
6416 	if (ib->flags & AMDGPU_IB_FLAG_CE)
6417 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6418 	else
6419 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6420 
6421 	control |= ib->length_dw | (vm_id << 24);
6422 
6423 	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6424 		control |= INDIRECT_BUFFER_PRE_ENB(1);
6425 
6426 		if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6427 			gfx_v8_0_ring_emit_de_meta(ring);
6428 	}
6429 
6430 	amdgpu_ring_write(ring, header);
6431 	amdgpu_ring_write(ring,
6432 #ifdef __BIG_ENDIAN
6433 			  (2 << 0) |
6434 #endif
6435 			  (ib->gpu_addr & 0xFFFFFFFC));
6436 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6437 	amdgpu_ring_write(ring, control);
6438 }
6439 
6440 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6441 					  struct amdgpu_ib *ib,
6442 					  unsigned vm_id, bool ctx_switch)
6443 {
6444 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6445 
6446 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6447 	amdgpu_ring_write(ring,
6448 #ifdef __BIG_ENDIAN
6449 				(2 << 0) |
6450 #endif
6451 				(ib->gpu_addr & 0xFFFFFFFC));
6452 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6453 	amdgpu_ring_write(ring, control);
6454 }
6455 
6456 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6457 					 u64 seq, unsigned flags)
6458 {
6459 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6460 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6461 
6462 	/* EVENT_WRITE_EOP - flush caches, send int */
6463 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6464 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6465 				 EOP_TC_ACTION_EN |
6466 				 EOP_TC_WB_ACTION_EN |
6467 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6468 				 EVENT_INDEX(5)));
6469 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6470 	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6471 			  DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6472 	amdgpu_ring_write(ring, lower_32_bits(seq));
6473 	amdgpu_ring_write(ring, upper_32_bits(seq));
6474 
6475 }
6476 
6477 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6478 {
6479 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6480 	uint32_t seq = ring->fence_drv.sync_seq;
6481 	uint64_t addr = ring->fence_drv.gpu_addr;
6482 
6483 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6484 	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6485 				 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6486 				 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6487 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6488 	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6489 	amdgpu_ring_write(ring, seq);
6490 	amdgpu_ring_write(ring, 0xffffffff);
6491 	amdgpu_ring_write(ring, 4); /* poll interval */
6492 }
6493 
6494 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6495 					unsigned vm_id, uint64_t pd_addr)
6496 {
6497 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6498 
6499 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6500 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6501 				 WRITE_DATA_DST_SEL(0)) |
6502 				 WR_CONFIRM);
6503 	if (vm_id < 8) {
6504 		amdgpu_ring_write(ring,
6505 				  (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6506 	} else {
6507 		amdgpu_ring_write(ring,
6508 				  (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6509 	}
6510 	amdgpu_ring_write(ring, 0);
6511 	amdgpu_ring_write(ring, pd_addr >> 12);
6512 
6513 	/* bits 0-15 are the VM contexts0-15 */
6514 	/* invalidate the cache */
6515 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6516 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6517 				 WRITE_DATA_DST_SEL(0)));
6518 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6519 	amdgpu_ring_write(ring, 0);
6520 	amdgpu_ring_write(ring, 1 << vm_id);
6521 
6522 	/* wait for the invalidate to complete */
6523 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6524 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6525 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6526 				 WAIT_REG_MEM_ENGINE(0))); /* me */
6527 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6528 	amdgpu_ring_write(ring, 0);
6529 	amdgpu_ring_write(ring, 0); /* ref */
6530 	amdgpu_ring_write(ring, 0); /* mask */
6531 	amdgpu_ring_write(ring, 0x20); /* poll interval */
6532 
6533 	/* compute doesn't have PFP */
6534 	if (usepfp) {
6535 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6536 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6537 		amdgpu_ring_write(ring, 0x0);
6538 	}
6539 }
6540 
6541 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6542 {
6543 	return ring->adev->wb.wb[ring->wptr_offs];
6544 }
6545 
6546 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6547 {
6548 	struct amdgpu_device *adev = ring->adev;
6549 
6550 	/* XXX check if swapping is necessary on BE */
6551 	adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6552 	WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6553 }
6554 
6555 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6556 					     u64 addr, u64 seq,
6557 					     unsigned flags)
6558 {
6559 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6560 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6561 
6562 	/* RELEASE_MEM - flush caches, send int */
6563 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6564 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6565 				 EOP_TC_ACTION_EN |
6566 				 EOP_TC_WB_ACTION_EN |
6567 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6568 				 EVENT_INDEX(5)));
6569 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6570 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6571 	amdgpu_ring_write(ring, upper_32_bits(addr));
6572 	amdgpu_ring_write(ring, lower_32_bits(seq));
6573 	amdgpu_ring_write(ring, upper_32_bits(seq));
6574 }
6575 
6576 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6577 					 u64 seq, unsigned int flags)
6578 {
6579 	/* we only allocate 32bit for each seq wb address */
6580 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6581 
6582 	/* write fence seq to the "addr" */
6583 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6584 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6585 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6586 	amdgpu_ring_write(ring, lower_32_bits(addr));
6587 	amdgpu_ring_write(ring, upper_32_bits(addr));
6588 	amdgpu_ring_write(ring, lower_32_bits(seq));
6589 
6590 	if (flags & AMDGPU_FENCE_FLAG_INT) {
6591 		/* set register to trigger INT */
6592 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6593 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6594 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6595 		amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6596 		amdgpu_ring_write(ring, 0);
6597 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6598 	}
6599 }
6600 
6601 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6602 {
6603 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6604 	amdgpu_ring_write(ring, 0);
6605 }
6606 
6607 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6608 {
6609 	uint32_t dw2 = 0;
6610 
6611 	if (amdgpu_sriov_vf(ring->adev))
6612 		gfx_v8_0_ring_emit_ce_meta(ring);
6613 
6614 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6615 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6616 		gfx_v8_0_ring_emit_vgt_flush(ring);
6617 		/* set load_global_config & load_global_uconfig */
6618 		dw2 |= 0x8001;
6619 		/* set load_cs_sh_regs */
6620 		dw2 |= 0x01000000;
6621 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
6622 		dw2 |= 0x10002;
6623 
6624 		/* set load_ce_ram if preamble presented */
6625 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6626 			dw2 |= 0x10000000;
6627 	} else {
6628 		/* still load_ce_ram if this is the first time preamble presented
6629 		 * although there is no context switch happens.
6630 		 */
6631 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6632 			dw2 |= 0x10000000;
6633 	}
6634 
6635 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6636 	amdgpu_ring_write(ring, dw2);
6637 	amdgpu_ring_write(ring, 0);
6638 }
6639 
6640 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6641 {
6642 	unsigned ret;
6643 
6644 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6645 	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6646 	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6647 	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6648 	ret = ring->wptr & ring->buf_mask;
6649 	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6650 	return ret;
6651 }
6652 
6653 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6654 {
6655 	unsigned cur;
6656 
6657 	BUG_ON(offset > ring->buf_mask);
6658 	BUG_ON(ring->ring[offset] != 0x55aa55aa);
6659 
6660 	cur = (ring->wptr & ring->buf_mask) - 1;
6661 	if (likely(cur > offset))
6662 		ring->ring[offset] = cur - offset;
6663 	else
6664 		ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6665 }
6666 
6667 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6668 {
6669 	struct amdgpu_device *adev = ring->adev;
6670 
6671 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6672 	amdgpu_ring_write(ring, 0 |	/* src: register*/
6673 				(5 << 8) |	/* dst: memory */
6674 				(1 << 20));	/* write confirm */
6675 	amdgpu_ring_write(ring, reg);
6676 	amdgpu_ring_write(ring, 0);
6677 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6678 				adev->virt.reg_val_offs * 4));
6679 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6680 				adev->virt.reg_val_offs * 4));
6681 }
6682 
6683 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6684 				  uint32_t val)
6685 {
6686 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6687 	amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */
6688 	amdgpu_ring_write(ring, reg);
6689 	amdgpu_ring_write(ring, 0);
6690 	amdgpu_ring_write(ring, val);
6691 }
6692 
6693 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6694 						 enum amdgpu_interrupt_state state)
6695 {
6696 	WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6697 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6698 }
6699 
6700 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6701 						     int me, int pipe,
6702 						     enum amdgpu_interrupt_state state)
6703 {
6704 	/*
6705 	 * amdgpu controls only pipe 0 of MEC1. That's why this function only
6706 	 * handles the setting of interrupts for this specific pipe. All other
6707 	 * pipes' interrupts are set by amdkfd.
6708 	 */
6709 
6710 	if (me == 1) {
6711 		switch (pipe) {
6712 		case 0:
6713 			break;
6714 		default:
6715 			DRM_DEBUG("invalid pipe %d\n", pipe);
6716 			return;
6717 		}
6718 	} else {
6719 		DRM_DEBUG("invalid me %d\n", me);
6720 		return;
6721 	}
6722 
6723 	WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE,
6724 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6725 }
6726 
6727 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6728 					     struct amdgpu_irq_src *source,
6729 					     unsigned type,
6730 					     enum amdgpu_interrupt_state state)
6731 {
6732 	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6733 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6734 
6735 	return 0;
6736 }
6737 
6738 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6739 					      struct amdgpu_irq_src *source,
6740 					      unsigned type,
6741 					      enum amdgpu_interrupt_state state)
6742 {
6743 	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6744 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6745 
6746 	return 0;
6747 }
6748 
6749 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6750 					    struct amdgpu_irq_src *src,
6751 					    unsigned type,
6752 					    enum amdgpu_interrupt_state state)
6753 {
6754 	switch (type) {
6755 	case AMDGPU_CP_IRQ_GFX_EOP:
6756 		gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6757 		break;
6758 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6759 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6760 		break;
6761 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6762 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6763 		break;
6764 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6765 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6766 		break;
6767 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6768 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6769 		break;
6770 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6771 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6772 		break;
6773 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6774 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6775 		break;
6776 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6777 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6778 		break;
6779 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6780 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6781 		break;
6782 	default:
6783 		break;
6784 	}
6785 	return 0;
6786 }
6787 
6788 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6789 			    struct amdgpu_irq_src *source,
6790 			    struct amdgpu_iv_entry *entry)
6791 {
6792 	int i;
6793 	u8 me_id, pipe_id, queue_id;
6794 	struct amdgpu_ring *ring;
6795 
6796 	DRM_DEBUG("IH: CP EOP\n");
6797 	me_id = (entry->ring_id & 0x0c) >> 2;
6798 	pipe_id = (entry->ring_id & 0x03) >> 0;
6799 	queue_id = (entry->ring_id & 0x70) >> 4;
6800 
6801 	switch (me_id) {
6802 	case 0:
6803 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6804 		break;
6805 	case 1:
6806 	case 2:
6807 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6808 			ring = &adev->gfx.compute_ring[i];
6809 			/* Per-queue interrupt is supported for MEC starting from VI.
6810 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
6811 			  */
6812 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6813 				amdgpu_fence_process(ring);
6814 		}
6815 		break;
6816 	}
6817 	return 0;
6818 }
6819 
6820 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6821 				 struct amdgpu_irq_src *source,
6822 				 struct amdgpu_iv_entry *entry)
6823 {
6824 	DRM_ERROR("Illegal register access in command stream\n");
6825 	schedule_work(&adev->reset_work);
6826 	return 0;
6827 }
6828 
6829 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6830 				  struct amdgpu_irq_src *source,
6831 				  struct amdgpu_iv_entry *entry)
6832 {
6833 	DRM_ERROR("Illegal instruction in command stream\n");
6834 	schedule_work(&adev->reset_work);
6835 	return 0;
6836 }
6837 
6838 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6839 					    struct amdgpu_irq_src *src,
6840 					    unsigned int type,
6841 					    enum amdgpu_interrupt_state state)
6842 {
6843 	struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6844 
6845 	switch (type) {
6846 	case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6847 		WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
6848 			     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6849 		if (ring->me == 1)
6850 			WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
6851 				     ring->pipe,
6852 				     GENERIC2_INT_ENABLE,
6853 				     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6854 		else
6855 			WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
6856 				     ring->pipe,
6857 				     GENERIC2_INT_ENABLE,
6858 				     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6859 		break;
6860 	default:
6861 		BUG(); /* kiq only support GENERIC2_INT now */
6862 		break;
6863 	}
6864 	return 0;
6865 }
6866 
6867 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
6868 			    struct amdgpu_irq_src *source,
6869 			    struct amdgpu_iv_entry *entry)
6870 {
6871 	u8 me_id, pipe_id, queue_id;
6872 	struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6873 
6874 	me_id = (entry->ring_id & 0x0c) >> 2;
6875 	pipe_id = (entry->ring_id & 0x03) >> 0;
6876 	queue_id = (entry->ring_id & 0x70) >> 4;
6877 	DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
6878 		   me_id, pipe_id, queue_id);
6879 
6880 	amdgpu_fence_process(ring);
6881 	return 0;
6882 }
6883 
6884 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6885 	.name = "gfx_v8_0",
6886 	.early_init = gfx_v8_0_early_init,
6887 	.late_init = gfx_v8_0_late_init,
6888 	.sw_init = gfx_v8_0_sw_init,
6889 	.sw_fini = gfx_v8_0_sw_fini,
6890 	.hw_init = gfx_v8_0_hw_init,
6891 	.hw_fini = gfx_v8_0_hw_fini,
6892 	.suspend = gfx_v8_0_suspend,
6893 	.resume = gfx_v8_0_resume,
6894 	.is_idle = gfx_v8_0_is_idle,
6895 	.wait_for_idle = gfx_v8_0_wait_for_idle,
6896 	.check_soft_reset = gfx_v8_0_check_soft_reset,
6897 	.pre_soft_reset = gfx_v8_0_pre_soft_reset,
6898 	.soft_reset = gfx_v8_0_soft_reset,
6899 	.post_soft_reset = gfx_v8_0_post_soft_reset,
6900 	.set_clockgating_state = gfx_v8_0_set_clockgating_state,
6901 	.set_powergating_state = gfx_v8_0_set_powergating_state,
6902 	.get_clockgating_state = gfx_v8_0_get_clockgating_state,
6903 };
6904 
6905 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6906 	.type = AMDGPU_RING_TYPE_GFX,
6907 	.align_mask = 0xff,
6908 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6909 	.support_64bit_ptrs = false,
6910 	.get_rptr = gfx_v8_0_ring_get_rptr,
6911 	.get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6912 	.set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6913 	.emit_frame_size = /* maximum 215dw if count 16 IBs in */
6914 		5 +  /* COND_EXEC */
6915 		7 +  /* PIPELINE_SYNC */
6916 		19 + /* VM_FLUSH */
6917 		8 +  /* FENCE for VM_FLUSH */
6918 		20 + /* GDS switch */
6919 		4 + /* double SWITCH_BUFFER,
6920 		       the first COND_EXEC jump to the place just
6921 			   prior to this double SWITCH_BUFFER  */
6922 		5 + /* COND_EXEC */
6923 		7 +	 /*	HDP_flush */
6924 		4 +	 /*	VGT_flush */
6925 		14 + /*	CE_META */
6926 		31 + /*	DE_META */
6927 		3 + /* CNTX_CTRL */
6928 		5 + /* HDP_INVL */
6929 		8 + 8 + /* FENCE x2 */
6930 		2, /* SWITCH_BUFFER */
6931 	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_gfx */
6932 	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6933 	.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6934 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6935 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6936 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6937 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6938 	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6939 	.test_ring = gfx_v8_0_ring_test_ring,
6940 	.test_ib = gfx_v8_0_ring_test_ib,
6941 	.insert_nop = amdgpu_ring_insert_nop,
6942 	.pad_ib = amdgpu_ring_generic_pad_ib,
6943 	.emit_switch_buffer = gfx_v8_ring_emit_sb,
6944 	.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6945 	.init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6946 	.patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6947 };
6948 
6949 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6950 	.type = AMDGPU_RING_TYPE_COMPUTE,
6951 	.align_mask = 0xff,
6952 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6953 	.support_64bit_ptrs = false,
6954 	.get_rptr = gfx_v8_0_ring_get_rptr,
6955 	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
6956 	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
6957 	.emit_frame_size =
6958 		20 + /* gfx_v8_0_ring_emit_gds_switch */
6959 		7 + /* gfx_v8_0_ring_emit_hdp_flush */
6960 		5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6961 		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6962 		17 + /* gfx_v8_0_ring_emit_vm_flush */
6963 		7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6964 	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_compute */
6965 	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
6966 	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
6967 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6968 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6969 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6970 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6971 	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6972 	.test_ring = gfx_v8_0_ring_test_ring,
6973 	.test_ib = gfx_v8_0_ring_test_ib,
6974 	.insert_nop = amdgpu_ring_insert_nop,
6975 	.pad_ib = amdgpu_ring_generic_pad_ib,
6976 };
6977 
6978 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6979 	.type = AMDGPU_RING_TYPE_KIQ,
6980 	.align_mask = 0xff,
6981 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6982 	.support_64bit_ptrs = false,
6983 	.get_rptr = gfx_v8_0_ring_get_rptr,
6984 	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
6985 	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
6986 	.emit_frame_size =
6987 		20 + /* gfx_v8_0_ring_emit_gds_switch */
6988 		7 + /* gfx_v8_0_ring_emit_hdp_flush */
6989 		5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6990 		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6991 		17 + /* gfx_v8_0_ring_emit_vm_flush */
6992 		7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6993 	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_compute */
6994 	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
6995 	.emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6996 	.test_ring = gfx_v8_0_ring_test_ring,
6997 	.test_ib = gfx_v8_0_ring_test_ib,
6998 	.insert_nop = amdgpu_ring_insert_nop,
6999 	.pad_ib = amdgpu_ring_generic_pad_ib,
7000 	.emit_rreg = gfx_v8_0_ring_emit_rreg,
7001 	.emit_wreg = gfx_v8_0_ring_emit_wreg,
7002 };
7003 
7004 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7005 {
7006 	int i;
7007 
7008 	adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7009 
7010 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7011 		adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7012 
7013 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
7014 		adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7015 }
7016 
7017 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7018 	.set = gfx_v8_0_set_eop_interrupt_state,
7019 	.process = gfx_v8_0_eop_irq,
7020 };
7021 
7022 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7023 	.set = gfx_v8_0_set_priv_reg_fault_state,
7024 	.process = gfx_v8_0_priv_reg_irq,
7025 };
7026 
7027 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7028 	.set = gfx_v8_0_set_priv_inst_fault_state,
7029 	.process = gfx_v8_0_priv_inst_irq,
7030 };
7031 
7032 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
7033 	.set = gfx_v8_0_kiq_set_interrupt_state,
7034 	.process = gfx_v8_0_kiq_irq,
7035 };
7036 
7037 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7038 {
7039 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7040 	adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7041 
7042 	adev->gfx.priv_reg_irq.num_types = 1;
7043 	adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7044 
7045 	adev->gfx.priv_inst_irq.num_types = 1;
7046 	adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7047 
7048 	adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
7049 	adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
7050 }
7051 
7052 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7053 {
7054 	adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7055 }
7056 
7057 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7058 {
7059 	/* init asci gds info */
7060 	adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7061 	adev->gds.gws.total_size = 64;
7062 	adev->gds.oa.total_size = 16;
7063 
7064 	if (adev->gds.mem.total_size == 64 * 1024) {
7065 		adev->gds.mem.gfx_partition_size = 4096;
7066 		adev->gds.mem.cs_partition_size = 4096;
7067 
7068 		adev->gds.gws.gfx_partition_size = 4;
7069 		adev->gds.gws.cs_partition_size = 4;
7070 
7071 		adev->gds.oa.gfx_partition_size = 4;
7072 		adev->gds.oa.cs_partition_size = 1;
7073 	} else {
7074 		adev->gds.mem.gfx_partition_size = 1024;
7075 		adev->gds.mem.cs_partition_size = 1024;
7076 
7077 		adev->gds.gws.gfx_partition_size = 16;
7078 		adev->gds.gws.cs_partition_size = 16;
7079 
7080 		adev->gds.oa.gfx_partition_size = 4;
7081 		adev->gds.oa.cs_partition_size = 4;
7082 	}
7083 }
7084 
7085 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7086 						 u32 bitmap)
7087 {
7088 	u32 data;
7089 
7090 	if (!bitmap)
7091 		return;
7092 
7093 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7094 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7095 
7096 	WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7097 }
7098 
7099 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7100 {
7101 	u32 data, mask;
7102 
7103 	data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7104 		RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7105 
7106 	mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
7107 
7108 	return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7109 }
7110 
7111 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7112 {
7113 	int i, j, k, counter, active_cu_number = 0;
7114 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7115 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7116 	unsigned disable_masks[4 * 2];
7117 	u32 ao_cu_num;
7118 
7119 	memset(cu_info, 0, sizeof(*cu_info));
7120 
7121 	if (adev->flags & AMD_IS_APU)
7122 		ao_cu_num = 2;
7123 	else
7124 		ao_cu_num = adev->gfx.config.max_cu_per_sh;
7125 
7126 	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7127 
7128 	mutex_lock(&adev->grbm_idx_mutex);
7129 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7130 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7131 			mask = 1;
7132 			ao_bitmap = 0;
7133 			counter = 0;
7134 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7135 			if (i < 4 && j < 2)
7136 				gfx_v8_0_set_user_cu_inactive_bitmap(
7137 					adev, disable_masks[i * 2 + j]);
7138 			bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7139 			cu_info->bitmap[i][j] = bitmap;
7140 
7141 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7142 				if (bitmap & mask) {
7143 					if (counter < ao_cu_num)
7144 						ao_bitmap |= mask;
7145 					counter ++;
7146 				}
7147 				mask <<= 1;
7148 			}
7149 			active_cu_number += counter;
7150 			ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7151 		}
7152 	}
7153 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7154 	mutex_unlock(&adev->grbm_idx_mutex);
7155 
7156 	cu_info->number = active_cu_number;
7157 	cu_info->ao_cu_mask = ao_cu_mask;
7158 }
7159 
7160 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7161 {
7162 	.type = AMD_IP_BLOCK_TYPE_GFX,
7163 	.major = 8,
7164 	.minor = 0,
7165 	.rev = 0,
7166 	.funcs = &gfx_v8_0_ip_funcs,
7167 };
7168 
7169 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7170 {
7171 	.type = AMD_IP_BLOCK_TYPE_GFX,
7172 	.major = 8,
7173 	.minor = 1,
7174 	.rev = 0,
7175 	.funcs = &gfx_v8_0_ip_funcs,
7176 };
7177 
7178 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7179 {
7180 	uint64_t ce_payload_addr;
7181 	int cnt_ce;
7182 	static union {
7183 		struct vi_ce_ib_state regular;
7184 		struct vi_ce_ib_state_chained_ib chained;
7185 	} ce_payload = {};
7186 
7187 	if (ring->adev->virt.chained_ib_support) {
7188 		ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
7189 						  offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7190 		cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7191 	} else {
7192 		ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
7193 						  offsetof(struct vi_gfx_meta_data, ce_payload);
7194 		cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7195 	}
7196 
7197 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7198 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7199 				WRITE_DATA_DST_SEL(8) |
7200 				WR_CONFIRM) |
7201 				WRITE_DATA_CACHE_POLICY(0));
7202 	amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7203 	amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7204 	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7205 }
7206 
7207 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7208 {
7209 	uint64_t de_payload_addr, gds_addr, csa_addr;
7210 	int cnt_de;
7211 	static union {
7212 		struct vi_de_ib_state regular;
7213 		struct vi_de_ib_state_chained_ib chained;
7214 	} de_payload = {};
7215 
7216 	csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096;
7217 	gds_addr = csa_addr + 4096;
7218 	if (ring->adev->virt.chained_ib_support) {
7219 		de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7220 		de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7221 		de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7222 		cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7223 	} else {
7224 		de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7225 		de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7226 		de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7227 		cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7228 	}
7229 
7230 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7231 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7232 				WRITE_DATA_DST_SEL(8) |
7233 				WR_CONFIRM) |
7234 				WRITE_DATA_CACHE_POLICY(0));
7235 	amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7236 	amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7237 	amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7238 }
7239 
7240 /* create MQD for each compute queue */
7241 static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev)
7242 {
7243 	struct amdgpu_ring *ring = NULL;
7244 	int r, i;
7245 
7246 	/* create MQD for KIQ */
7247 	ring = &adev->gfx.kiq.ring;
7248 	if (!ring->mqd_obj) {
7249 		r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE,
7250 					    AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
7251 					    &ring->mqd_gpu_addr, &ring->mqd_ptr);
7252 		if (r) {
7253 			dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
7254 			return r;
7255 		}
7256 
7257 		/* prepare MQD backup */
7258 		adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL);
7259 		if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS])
7260 				dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
7261 	}
7262 
7263 	/* create MQD for each KCQ */
7264 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
7265 		ring = &adev->gfx.compute_ring[i];
7266 		if (!ring->mqd_obj) {
7267 			r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE,
7268 						    AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
7269 						    &ring->mqd_gpu_addr, &ring->mqd_ptr);
7270 			if (r) {
7271 				dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
7272 				return r;
7273 			}
7274 
7275 			/* prepare MQD backup */
7276 			adev->gfx.mec.mqd_backup[i] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL);
7277 			if (!adev->gfx.mec.mqd_backup[i])
7278 				dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
7279 		}
7280 	}
7281 
7282 	return 0;
7283 }
7284 
7285 static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev)
7286 {
7287 	struct amdgpu_ring *ring = NULL;
7288 	int i;
7289 
7290 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
7291 		ring = &adev->gfx.compute_ring[i];
7292 		kfree(adev->gfx.mec.mqd_backup[i]);
7293 		amdgpu_bo_free_kernel(&ring->mqd_obj,
7294 				      &ring->mqd_gpu_addr,
7295 				      &ring->mqd_ptr);
7296 	}
7297 
7298 	ring = &adev->gfx.kiq.ring;
7299 	kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]);
7300 	amdgpu_bo_free_kernel(&ring->mqd_obj,
7301 			      &ring->mqd_gpu_addr,
7302 			      &ring->mqd_ptr);
7303 }
7304