xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c (revision d3964221)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include <drm/drmP.h>
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vi_structs.h"
29 #include "vid.h"
30 #include "amdgpu_ucode.h"
31 #include "amdgpu_atombios.h"
32 #include "atombios_i2c.h"
33 #include "clearstate_vi.h"
34 
35 #include "gmc/gmc_8_2_d.h"
36 #include "gmc/gmc_8_2_sh_mask.h"
37 
38 #include "oss/oss_3_0_d.h"
39 #include "oss/oss_3_0_sh_mask.h"
40 
41 #include "bif/bif_5_0_d.h"
42 #include "bif/bif_5_0_sh_mask.h"
43 #include "gca/gfx_8_0_d.h"
44 #include "gca/gfx_8_0_enum.h"
45 #include "gca/gfx_8_0_sh_mask.h"
46 #include "gca/gfx_8_0_enum.h"
47 
48 #include "dce/dce_10_0_d.h"
49 #include "dce/dce_10_0_sh_mask.h"
50 
51 #include "smu/smu_7_1_3_d.h"
52 
53 #define GFX8_NUM_GFX_RINGS     1
54 #define GFX8_MEC_HPD_SIZE 2048
55 
56 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
59 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
60 
61 #define ARRAY_MODE(x)					((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
62 #define PIPE_CONFIG(x)					((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
63 #define TILE_SPLIT(x)					((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
64 #define MICRO_TILE_MODE_NEW(x)				((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
65 #define SAMPLE_SPLIT(x)					((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
66 #define BANK_WIDTH(x)					((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
67 #define BANK_HEIGHT(x)					((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
68 #define MACRO_TILE_ASPECT(x)				((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
69 #define NUM_BANKS(x)					((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
70 
71 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
72 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
73 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
74 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
76 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
77 
78 /* BPM SERDES CMD */
79 #define SET_BPM_SERDES_CMD    1
80 #define CLE_BPM_SERDES_CMD    0
81 
82 /* BPM Register Address*/
83 enum {
84 	BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
85 	BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
86 	BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
87 	BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
88 	BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
89 	BPM_REG_FGCG_MAX
90 };
91 
92 #define RLC_FormatDirectRegListLength        14
93 
94 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
95 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
100 
101 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
106 
107 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
113 
114 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
119 
120 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
126 
127 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
128 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
133 
134 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
140 
141 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
142 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
143 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
144 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
145 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
146 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
147 
148 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
149 {
150 	{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
151 	{mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
152 	{mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
153 	{mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
154 	{mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
155 	{mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
156 	{mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
157 	{mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
158 	{mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
159 	{mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
160 	{mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
161 	{mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
162 	{mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
163 	{mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
164 	{mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
165 	{mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
166 };
167 
168 static const u32 golden_settings_tonga_a11[] =
169 {
170 	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
171 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
172 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
173 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
174 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
175 	mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
176 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
177 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
178 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
179 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
180 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
181 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
182 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
183 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
184 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
185 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
186 };
187 
188 static const u32 tonga_golden_common_all[] =
189 {
190 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
191 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
192 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
193 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
194 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
195 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
196 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
197 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
198 };
199 
200 static const u32 tonga_mgcg_cgcg_init[] =
201 {
202 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
203 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
204 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
205 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
206 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
207 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
208 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
209 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
210 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
211 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
212 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
213 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
214 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
215 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
216 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
217 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
218 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
219 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
220 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
221 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
222 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
223 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
224 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
225 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
226 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
227 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
228 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
229 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
230 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
231 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
232 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
233 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
234 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
235 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
236 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
237 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
238 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
239 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
240 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
241 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
242 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
243 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
244 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
245 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
246 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
247 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
248 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
249 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
250 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
251 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
252 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
253 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
254 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
255 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
256 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
257 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
258 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
259 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
260 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
261 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
262 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
263 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
264 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
265 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
266 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
267 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
268 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
269 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
270 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
271 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
272 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
273 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
274 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
275 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
276 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
277 };
278 
279 static const u32 golden_settings_polaris11_a11[] =
280 {
281 	mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
282 	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
283 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
284 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
285 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
286 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
287 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
288 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
289 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
290 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
291 	mmSQ_CONFIG, 0x07f80000, 0x01180000,
292 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
293 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
294 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
295 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
296 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
297 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
298 };
299 
300 static const u32 polaris11_golden_common_all[] =
301 {
302 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
303 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
304 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
305 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
306 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
307 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
308 };
309 
310 static const u32 golden_settings_polaris10_a11[] =
311 {
312 	mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
313 	mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
314 	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
315 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
316 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
317 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
318 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
319 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
320 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
321 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
322 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
323 	mmSQ_CONFIG, 0x07f80000, 0x07180000,
324 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
325 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
326 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
327 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
328 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
329 };
330 
331 static const u32 polaris10_golden_common_all[] =
332 {
333 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
334 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
335 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
336 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
337 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
338 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
339 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
340 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
341 };
342 
343 static const u32 fiji_golden_common_all[] =
344 {
345 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
346 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
347 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
348 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
349 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
350 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
351 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
352 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
353 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
354 	mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
355 };
356 
357 static const u32 golden_settings_fiji_a10[] =
358 {
359 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
360 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
361 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
362 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
363 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
364 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
365 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
366 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
367 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
368 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
369 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
370 };
371 
372 static const u32 fiji_mgcg_cgcg_init[] =
373 {
374 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
375 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
376 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
377 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
378 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
379 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
380 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
381 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
382 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
383 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
384 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
385 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
386 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
387 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
388 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
389 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
390 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
391 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
392 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
393 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
394 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
395 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
396 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
397 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
398 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
399 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
400 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
401 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
402 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
403 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
404 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
405 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
406 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
407 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
408 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
409 };
410 
411 static const u32 golden_settings_iceland_a11[] =
412 {
413 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
414 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
415 	mmDB_DEBUG3, 0xc0000000, 0xc0000000,
416 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
417 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
418 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
419 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
420 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
421 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
422 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
423 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
424 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
425 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
426 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
427 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
428 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
429 };
430 
431 static const u32 iceland_golden_common_all[] =
432 {
433 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
434 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
435 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
436 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
437 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
438 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
439 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
440 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
441 };
442 
443 static const u32 iceland_mgcg_cgcg_init[] =
444 {
445 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
446 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
447 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
448 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
449 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
450 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
451 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
452 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
453 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
454 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
455 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
456 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
457 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
458 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
459 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
460 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
461 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
462 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
463 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
464 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
465 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
466 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
467 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
468 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
469 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
470 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
471 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
472 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
473 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
474 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
475 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
476 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
477 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
478 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
479 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
480 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
481 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
482 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
483 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
484 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
485 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
486 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
487 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
488 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
489 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
490 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
491 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
492 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
493 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
494 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
495 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
496 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
497 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
498 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
499 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
500 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
501 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
502 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
503 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
504 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
505 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
506 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
507 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
508 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
509 };
510 
511 static const u32 cz_golden_settings_a11[] =
512 {
513 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
514 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
515 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
516 	mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
517 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
518 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
519 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
520 	mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
521 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
522 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
523 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
524 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
525 };
526 
527 static const u32 cz_golden_common_all[] =
528 {
529 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
530 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
531 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
532 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
533 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
534 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
535 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
536 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
537 };
538 
539 static const u32 cz_mgcg_cgcg_init[] =
540 {
541 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
542 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
543 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
544 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
545 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
546 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
547 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
548 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
549 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
550 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
551 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
552 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
553 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
554 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
555 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
556 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
557 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
558 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
559 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
560 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
561 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
562 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
563 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
564 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
565 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
566 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
567 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
568 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
569 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
570 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
571 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
572 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
573 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
574 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
575 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
576 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
577 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
578 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
579 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
580 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
581 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
582 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
583 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
584 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
585 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
586 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
587 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
588 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
589 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
590 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
591 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
592 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
593 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
594 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
595 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
596 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
597 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
598 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
599 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
600 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
601 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
602 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
603 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
604 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
605 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
606 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
607 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
608 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
609 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
610 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
611 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
612 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
613 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
614 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
615 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
616 };
617 
618 static const u32 stoney_golden_settings_a11[] =
619 {
620 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
621 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
622 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
623 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
624 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
625 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
626 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
627 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
628 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
629 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
630 };
631 
632 static const u32 stoney_golden_common_all[] =
633 {
634 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
635 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
636 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
637 	mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
638 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
639 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
640 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
641 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
642 };
643 
644 static const u32 stoney_mgcg_cgcg_init[] =
645 {
646 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
647 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
648 	mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
649 	mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
650 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
651 };
652 
653 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
654 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
655 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
656 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
657 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
658 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
659 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
660 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
661 
662 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
663 {
664 	switch (adev->asic_type) {
665 	case CHIP_TOPAZ:
666 		amdgpu_program_register_sequence(adev,
667 						 iceland_mgcg_cgcg_init,
668 						 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
669 		amdgpu_program_register_sequence(adev,
670 						 golden_settings_iceland_a11,
671 						 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
672 		amdgpu_program_register_sequence(adev,
673 						 iceland_golden_common_all,
674 						 (const u32)ARRAY_SIZE(iceland_golden_common_all));
675 		break;
676 	case CHIP_FIJI:
677 		amdgpu_program_register_sequence(adev,
678 						 fiji_mgcg_cgcg_init,
679 						 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
680 		amdgpu_program_register_sequence(adev,
681 						 golden_settings_fiji_a10,
682 						 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
683 		amdgpu_program_register_sequence(adev,
684 						 fiji_golden_common_all,
685 						 (const u32)ARRAY_SIZE(fiji_golden_common_all));
686 		break;
687 
688 	case CHIP_TONGA:
689 		amdgpu_program_register_sequence(adev,
690 						 tonga_mgcg_cgcg_init,
691 						 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
692 		amdgpu_program_register_sequence(adev,
693 						 golden_settings_tonga_a11,
694 						 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
695 		amdgpu_program_register_sequence(adev,
696 						 tonga_golden_common_all,
697 						 (const u32)ARRAY_SIZE(tonga_golden_common_all));
698 		break;
699 	case CHIP_POLARIS11:
700 	case CHIP_POLARIS12:
701 		amdgpu_program_register_sequence(adev,
702 						 golden_settings_polaris11_a11,
703 						 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
704 		amdgpu_program_register_sequence(adev,
705 						 polaris11_golden_common_all,
706 						 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
707 		break;
708 	case CHIP_POLARIS10:
709 		amdgpu_program_register_sequence(adev,
710 						 golden_settings_polaris10_a11,
711 						 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
712 		amdgpu_program_register_sequence(adev,
713 						 polaris10_golden_common_all,
714 						 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
715 		WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
716 		if (adev->pdev->revision == 0xc7 &&
717 		    ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
718 		     (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
719 		     (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
720 			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
721 			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
722 		}
723 		break;
724 	case CHIP_CARRIZO:
725 		amdgpu_program_register_sequence(adev,
726 						 cz_mgcg_cgcg_init,
727 						 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
728 		amdgpu_program_register_sequence(adev,
729 						 cz_golden_settings_a11,
730 						 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
731 		amdgpu_program_register_sequence(adev,
732 						 cz_golden_common_all,
733 						 (const u32)ARRAY_SIZE(cz_golden_common_all));
734 		break;
735 	case CHIP_STONEY:
736 		amdgpu_program_register_sequence(adev,
737 						 stoney_mgcg_cgcg_init,
738 						 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
739 		amdgpu_program_register_sequence(adev,
740 						 stoney_golden_settings_a11,
741 						 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
742 		amdgpu_program_register_sequence(adev,
743 						 stoney_golden_common_all,
744 						 (const u32)ARRAY_SIZE(stoney_golden_common_all));
745 		break;
746 	default:
747 		break;
748 	}
749 }
750 
751 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
752 {
753 	adev->gfx.scratch.num_reg = 8;
754 	adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
755 	adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
756 }
757 
758 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
759 {
760 	struct amdgpu_device *adev = ring->adev;
761 	uint32_t scratch;
762 	uint32_t tmp = 0;
763 	unsigned i;
764 	int r;
765 
766 	r = amdgpu_gfx_scratch_get(adev, &scratch);
767 	if (r) {
768 		DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
769 		return r;
770 	}
771 	WREG32(scratch, 0xCAFEDEAD);
772 	r = amdgpu_ring_alloc(ring, 3);
773 	if (r) {
774 		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
775 			  ring->idx, r);
776 		amdgpu_gfx_scratch_free(adev, scratch);
777 		return r;
778 	}
779 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
780 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
781 	amdgpu_ring_write(ring, 0xDEADBEEF);
782 	amdgpu_ring_commit(ring);
783 
784 	for (i = 0; i < adev->usec_timeout; i++) {
785 		tmp = RREG32(scratch);
786 		if (tmp == 0xDEADBEEF)
787 			break;
788 		DRM_UDELAY(1);
789 	}
790 	if (i < adev->usec_timeout) {
791 		DRM_INFO("ring test on %d succeeded in %d usecs\n",
792 			 ring->idx, i);
793 	} else {
794 		DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
795 			  ring->idx, scratch, tmp);
796 		r = -EINVAL;
797 	}
798 	amdgpu_gfx_scratch_free(adev, scratch);
799 	return r;
800 }
801 
802 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
803 {
804 	struct amdgpu_device *adev = ring->adev;
805 	struct amdgpu_ib ib;
806 	struct dma_fence *f = NULL;
807 	uint32_t scratch;
808 	uint32_t tmp = 0;
809 	long r;
810 
811 	r = amdgpu_gfx_scratch_get(adev, &scratch);
812 	if (r) {
813 		DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
814 		return r;
815 	}
816 	WREG32(scratch, 0xCAFEDEAD);
817 	memset(&ib, 0, sizeof(ib));
818 	r = amdgpu_ib_get(adev, NULL, 256, &ib);
819 	if (r) {
820 		DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
821 		goto err1;
822 	}
823 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
824 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
825 	ib.ptr[2] = 0xDEADBEEF;
826 	ib.length_dw = 3;
827 
828 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
829 	if (r)
830 		goto err2;
831 
832 	r = dma_fence_wait_timeout(f, false, timeout);
833 	if (r == 0) {
834 		DRM_ERROR("amdgpu: IB test timed out.\n");
835 		r = -ETIMEDOUT;
836 		goto err2;
837 	} else if (r < 0) {
838 		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
839 		goto err2;
840 	}
841 	tmp = RREG32(scratch);
842 	if (tmp == 0xDEADBEEF) {
843 		DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
844 		r = 0;
845 	} else {
846 		DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
847 			  scratch, tmp);
848 		r = -EINVAL;
849 	}
850 err2:
851 	amdgpu_ib_free(adev, &ib, NULL);
852 	dma_fence_put(f);
853 err1:
854 	amdgpu_gfx_scratch_free(adev, scratch);
855 	return r;
856 }
857 
858 
859 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
860 {
861 	release_firmware(adev->gfx.pfp_fw);
862 	adev->gfx.pfp_fw = NULL;
863 	release_firmware(adev->gfx.me_fw);
864 	adev->gfx.me_fw = NULL;
865 	release_firmware(adev->gfx.ce_fw);
866 	adev->gfx.ce_fw = NULL;
867 	release_firmware(adev->gfx.rlc_fw);
868 	adev->gfx.rlc_fw = NULL;
869 	release_firmware(adev->gfx.mec_fw);
870 	adev->gfx.mec_fw = NULL;
871 	if ((adev->asic_type != CHIP_STONEY) &&
872 	    (adev->asic_type != CHIP_TOPAZ))
873 		release_firmware(adev->gfx.mec2_fw);
874 	adev->gfx.mec2_fw = NULL;
875 
876 	kfree(adev->gfx.rlc.register_list_format);
877 }
878 
879 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
880 {
881 	const char *chip_name;
882 	char fw_name[30];
883 	int err;
884 	struct amdgpu_firmware_info *info = NULL;
885 	const struct common_firmware_header *header = NULL;
886 	const struct gfx_firmware_header_v1_0 *cp_hdr;
887 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
888 	unsigned int *tmp = NULL, i;
889 
890 	DRM_DEBUG("\n");
891 
892 	switch (adev->asic_type) {
893 	case CHIP_TOPAZ:
894 		chip_name = "topaz";
895 		break;
896 	case CHIP_TONGA:
897 		chip_name = "tonga";
898 		break;
899 	case CHIP_CARRIZO:
900 		chip_name = "carrizo";
901 		break;
902 	case CHIP_FIJI:
903 		chip_name = "fiji";
904 		break;
905 	case CHIP_POLARIS11:
906 		chip_name = "polaris11";
907 		break;
908 	case CHIP_POLARIS10:
909 		chip_name = "polaris10";
910 		break;
911 	case CHIP_POLARIS12:
912 		chip_name = "polaris12";
913 		break;
914 	case CHIP_STONEY:
915 		chip_name = "stoney";
916 		break;
917 	default:
918 		BUG();
919 	}
920 
921 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
922 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
923 	if (err)
924 		goto out;
925 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
926 	if (err)
927 		goto out;
928 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
929 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
930 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
931 
932 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
933 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
934 	if (err)
935 		goto out;
936 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
937 	if (err)
938 		goto out;
939 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
940 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
941 
942 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
943 
944 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
945 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
946 	if (err)
947 		goto out;
948 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
949 	if (err)
950 		goto out;
951 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
952 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
953 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
954 
955 	/*
956 	 * Support for MCBP/Virtualization in combination with chained IBs is
957 	 * formal released on feature version #46
958 	 */
959 	if (adev->gfx.ce_feature_version >= 46 &&
960 	    adev->gfx.pfp_feature_version >= 46) {
961 		adev->virt.chained_ib_support = true;
962 		DRM_INFO("Chained IB support enabled!\n");
963 	} else
964 		adev->virt.chained_ib_support = false;
965 
966 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
967 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
968 	if (err)
969 		goto out;
970 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
971 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
972 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
973 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
974 
975 	adev->gfx.rlc.save_and_restore_offset =
976 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
977 	adev->gfx.rlc.clear_state_descriptor_offset =
978 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
979 	adev->gfx.rlc.avail_scratch_ram_locations =
980 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
981 	adev->gfx.rlc.reg_restore_list_size =
982 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
983 	adev->gfx.rlc.reg_list_format_start =
984 			le32_to_cpu(rlc_hdr->reg_list_format_start);
985 	adev->gfx.rlc.reg_list_format_separate_start =
986 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
987 	adev->gfx.rlc.starting_offsets_start =
988 			le32_to_cpu(rlc_hdr->starting_offsets_start);
989 	adev->gfx.rlc.reg_list_format_size_bytes =
990 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
991 	adev->gfx.rlc.reg_list_size_bytes =
992 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
993 
994 	adev->gfx.rlc.register_list_format =
995 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
996 					adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
997 
998 	if (!adev->gfx.rlc.register_list_format) {
999 		err = -ENOMEM;
1000 		goto out;
1001 	}
1002 
1003 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1004 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1005 	for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1006 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
1007 
1008 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1009 
1010 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1011 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1012 	for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1013 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1014 
1015 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1016 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1017 	if (err)
1018 		goto out;
1019 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1020 	if (err)
1021 		goto out;
1022 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1023 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1024 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1025 
1026 	if ((adev->asic_type != CHIP_STONEY) &&
1027 	    (adev->asic_type != CHIP_TOPAZ)) {
1028 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1029 		err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1030 		if (!err) {
1031 			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1032 			if (err)
1033 				goto out;
1034 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1035 				adev->gfx.mec2_fw->data;
1036 			adev->gfx.mec2_fw_version =
1037 				le32_to_cpu(cp_hdr->header.ucode_version);
1038 			adev->gfx.mec2_feature_version =
1039 				le32_to_cpu(cp_hdr->ucode_feature_version);
1040 		} else {
1041 			err = 0;
1042 			adev->gfx.mec2_fw = NULL;
1043 		}
1044 	}
1045 
1046 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
1047 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1048 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1049 		info->fw = adev->gfx.pfp_fw;
1050 		header = (const struct common_firmware_header *)info->fw->data;
1051 		adev->firmware.fw_size +=
1052 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1053 
1054 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1055 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1056 		info->fw = adev->gfx.me_fw;
1057 		header = (const struct common_firmware_header *)info->fw->data;
1058 		adev->firmware.fw_size +=
1059 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1060 
1061 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1062 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1063 		info->fw = adev->gfx.ce_fw;
1064 		header = (const struct common_firmware_header *)info->fw->data;
1065 		adev->firmware.fw_size +=
1066 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1067 
1068 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1069 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1070 		info->fw = adev->gfx.rlc_fw;
1071 		header = (const struct common_firmware_header *)info->fw->data;
1072 		adev->firmware.fw_size +=
1073 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1074 
1075 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1076 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1077 		info->fw = adev->gfx.mec_fw;
1078 		header = (const struct common_firmware_header *)info->fw->data;
1079 		adev->firmware.fw_size +=
1080 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1081 
1082 		/* we need account JT in */
1083 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1084 		adev->firmware.fw_size +=
1085 			ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1086 
1087 		if (amdgpu_sriov_vf(adev)) {
1088 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1089 			info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1090 			info->fw = adev->gfx.mec_fw;
1091 			adev->firmware.fw_size +=
1092 				ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1093 		}
1094 
1095 		if (adev->gfx.mec2_fw) {
1096 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1097 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1098 			info->fw = adev->gfx.mec2_fw;
1099 			header = (const struct common_firmware_header *)info->fw->data;
1100 			adev->firmware.fw_size +=
1101 				ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1102 		}
1103 
1104 	}
1105 
1106 out:
1107 	if (err) {
1108 		dev_err(adev->dev,
1109 			"gfx8: Failed to load firmware \"%s\"\n",
1110 			fw_name);
1111 		release_firmware(adev->gfx.pfp_fw);
1112 		adev->gfx.pfp_fw = NULL;
1113 		release_firmware(adev->gfx.me_fw);
1114 		adev->gfx.me_fw = NULL;
1115 		release_firmware(adev->gfx.ce_fw);
1116 		adev->gfx.ce_fw = NULL;
1117 		release_firmware(adev->gfx.rlc_fw);
1118 		adev->gfx.rlc_fw = NULL;
1119 		release_firmware(adev->gfx.mec_fw);
1120 		adev->gfx.mec_fw = NULL;
1121 		release_firmware(adev->gfx.mec2_fw);
1122 		adev->gfx.mec2_fw = NULL;
1123 	}
1124 	return err;
1125 }
1126 
1127 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1128 				    volatile u32 *buffer)
1129 {
1130 	u32 count = 0, i;
1131 	const struct cs_section_def *sect = NULL;
1132 	const struct cs_extent_def *ext = NULL;
1133 
1134 	if (adev->gfx.rlc.cs_data == NULL)
1135 		return;
1136 	if (buffer == NULL)
1137 		return;
1138 
1139 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1140 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1141 
1142 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1143 	buffer[count++] = cpu_to_le32(0x80000000);
1144 	buffer[count++] = cpu_to_le32(0x80000000);
1145 
1146 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1147 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1148 			if (sect->id == SECT_CONTEXT) {
1149 				buffer[count++] =
1150 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1151 				buffer[count++] = cpu_to_le32(ext->reg_index -
1152 						PACKET3_SET_CONTEXT_REG_START);
1153 				for (i = 0; i < ext->reg_count; i++)
1154 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1155 			} else {
1156 				return;
1157 			}
1158 		}
1159 	}
1160 
1161 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1162 	buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1163 			PACKET3_SET_CONTEXT_REG_START);
1164 	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1165 	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1166 
1167 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1168 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1169 
1170 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1171 	buffer[count++] = cpu_to_le32(0);
1172 }
1173 
1174 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1175 {
1176 	const __le32 *fw_data;
1177 	volatile u32 *dst_ptr;
1178 	int me, i, max_me = 4;
1179 	u32 bo_offset = 0;
1180 	u32 table_offset, table_size;
1181 
1182 	if (adev->asic_type == CHIP_CARRIZO)
1183 		max_me = 5;
1184 
1185 	/* write the cp table buffer */
1186 	dst_ptr = adev->gfx.rlc.cp_table_ptr;
1187 	for (me = 0; me < max_me; me++) {
1188 		if (me == 0) {
1189 			const struct gfx_firmware_header_v1_0 *hdr =
1190 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1191 			fw_data = (const __le32 *)
1192 				(adev->gfx.ce_fw->data +
1193 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1194 			table_offset = le32_to_cpu(hdr->jt_offset);
1195 			table_size = le32_to_cpu(hdr->jt_size);
1196 		} else if (me == 1) {
1197 			const struct gfx_firmware_header_v1_0 *hdr =
1198 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1199 			fw_data = (const __le32 *)
1200 				(adev->gfx.pfp_fw->data +
1201 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1202 			table_offset = le32_to_cpu(hdr->jt_offset);
1203 			table_size = le32_to_cpu(hdr->jt_size);
1204 		} else if (me == 2) {
1205 			const struct gfx_firmware_header_v1_0 *hdr =
1206 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1207 			fw_data = (const __le32 *)
1208 				(adev->gfx.me_fw->data +
1209 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1210 			table_offset = le32_to_cpu(hdr->jt_offset);
1211 			table_size = le32_to_cpu(hdr->jt_size);
1212 		} else if (me == 3) {
1213 			const struct gfx_firmware_header_v1_0 *hdr =
1214 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1215 			fw_data = (const __le32 *)
1216 				(adev->gfx.mec_fw->data +
1217 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1218 			table_offset = le32_to_cpu(hdr->jt_offset);
1219 			table_size = le32_to_cpu(hdr->jt_size);
1220 		} else  if (me == 4) {
1221 			const struct gfx_firmware_header_v1_0 *hdr =
1222 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1223 			fw_data = (const __le32 *)
1224 				(adev->gfx.mec2_fw->data +
1225 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1226 			table_offset = le32_to_cpu(hdr->jt_offset);
1227 			table_size = le32_to_cpu(hdr->jt_size);
1228 		}
1229 
1230 		for (i = 0; i < table_size; i ++) {
1231 			dst_ptr[bo_offset + i] =
1232 				cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1233 		}
1234 
1235 		bo_offset += table_size;
1236 	}
1237 }
1238 
1239 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1240 {
1241 	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
1242 	amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
1243 }
1244 
1245 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1246 {
1247 	volatile u32 *dst_ptr;
1248 	u32 dws;
1249 	const struct cs_section_def *cs_data;
1250 	int r;
1251 
1252 	adev->gfx.rlc.cs_data = vi_cs_data;
1253 
1254 	cs_data = adev->gfx.rlc.cs_data;
1255 
1256 	if (cs_data) {
1257 		/* clear state block */
1258 		adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1259 
1260 		r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
1261 					      AMDGPU_GEM_DOMAIN_VRAM,
1262 					      &adev->gfx.rlc.clear_state_obj,
1263 					      &adev->gfx.rlc.clear_state_gpu_addr,
1264 					      (void **)&adev->gfx.rlc.cs_ptr);
1265 		if (r) {
1266 			dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1267 			gfx_v8_0_rlc_fini(adev);
1268 			return r;
1269 		}
1270 
1271 		/* set up the cs buffer */
1272 		dst_ptr = adev->gfx.rlc.cs_ptr;
1273 		gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1274 		amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1275 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1276 	}
1277 
1278 	if ((adev->asic_type == CHIP_CARRIZO) ||
1279 	    (adev->asic_type == CHIP_STONEY)) {
1280 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1281 		r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
1282 					      PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1283 					      &adev->gfx.rlc.cp_table_obj,
1284 					      &adev->gfx.rlc.cp_table_gpu_addr,
1285 					      (void **)&adev->gfx.rlc.cp_table_ptr);
1286 		if (r) {
1287 			dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1288 			return r;
1289 		}
1290 
1291 		cz_init_cp_jump_table(adev);
1292 
1293 		amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1294 		amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1295 	}
1296 
1297 	return 0;
1298 }
1299 
1300 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1301 {
1302 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1303 }
1304 
1305 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1306 {
1307 	int r;
1308 	u32 *hpd;
1309 	size_t mec_hpd_size;
1310 
1311 	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1312 
1313 	/* take ownership of the relevant compute queues */
1314 	amdgpu_gfx_compute_queue_acquire(adev);
1315 
1316 	mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1317 
1318 	r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1319 				      AMDGPU_GEM_DOMAIN_GTT,
1320 				      &adev->gfx.mec.hpd_eop_obj,
1321 				      &adev->gfx.mec.hpd_eop_gpu_addr,
1322 				      (void **)&hpd);
1323 	if (r) {
1324 		dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1325 		return r;
1326 	}
1327 
1328 	memset(hpd, 0, mec_hpd_size);
1329 
1330 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1331 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1332 
1333 	return 0;
1334 }
1335 
1336 static const u32 vgpr_init_compute_shader[] =
1337 {
1338 	0x7e000209, 0x7e020208,
1339 	0x7e040207, 0x7e060206,
1340 	0x7e080205, 0x7e0a0204,
1341 	0x7e0c0203, 0x7e0e0202,
1342 	0x7e100201, 0x7e120200,
1343 	0x7e140209, 0x7e160208,
1344 	0x7e180207, 0x7e1a0206,
1345 	0x7e1c0205, 0x7e1e0204,
1346 	0x7e200203, 0x7e220202,
1347 	0x7e240201, 0x7e260200,
1348 	0x7e280209, 0x7e2a0208,
1349 	0x7e2c0207, 0x7e2e0206,
1350 	0x7e300205, 0x7e320204,
1351 	0x7e340203, 0x7e360202,
1352 	0x7e380201, 0x7e3a0200,
1353 	0x7e3c0209, 0x7e3e0208,
1354 	0x7e400207, 0x7e420206,
1355 	0x7e440205, 0x7e460204,
1356 	0x7e480203, 0x7e4a0202,
1357 	0x7e4c0201, 0x7e4e0200,
1358 	0x7e500209, 0x7e520208,
1359 	0x7e540207, 0x7e560206,
1360 	0x7e580205, 0x7e5a0204,
1361 	0x7e5c0203, 0x7e5e0202,
1362 	0x7e600201, 0x7e620200,
1363 	0x7e640209, 0x7e660208,
1364 	0x7e680207, 0x7e6a0206,
1365 	0x7e6c0205, 0x7e6e0204,
1366 	0x7e700203, 0x7e720202,
1367 	0x7e740201, 0x7e760200,
1368 	0x7e780209, 0x7e7a0208,
1369 	0x7e7c0207, 0x7e7e0206,
1370 	0xbf8a0000, 0xbf810000,
1371 };
1372 
1373 static const u32 sgpr_init_compute_shader[] =
1374 {
1375 	0xbe8a0100, 0xbe8c0102,
1376 	0xbe8e0104, 0xbe900106,
1377 	0xbe920108, 0xbe940100,
1378 	0xbe960102, 0xbe980104,
1379 	0xbe9a0106, 0xbe9c0108,
1380 	0xbe9e0100, 0xbea00102,
1381 	0xbea20104, 0xbea40106,
1382 	0xbea60108, 0xbea80100,
1383 	0xbeaa0102, 0xbeac0104,
1384 	0xbeae0106, 0xbeb00108,
1385 	0xbeb20100, 0xbeb40102,
1386 	0xbeb60104, 0xbeb80106,
1387 	0xbeba0108, 0xbebc0100,
1388 	0xbebe0102, 0xbec00104,
1389 	0xbec20106, 0xbec40108,
1390 	0xbec60100, 0xbec80102,
1391 	0xbee60004, 0xbee70005,
1392 	0xbeea0006, 0xbeeb0007,
1393 	0xbee80008, 0xbee90009,
1394 	0xbefc0000, 0xbf8a0000,
1395 	0xbf810000, 0x00000000,
1396 };
1397 
1398 static const u32 vgpr_init_regs[] =
1399 {
1400 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1401 	mmCOMPUTE_RESOURCE_LIMITS, 0,
1402 	mmCOMPUTE_NUM_THREAD_X, 256*4,
1403 	mmCOMPUTE_NUM_THREAD_Y, 1,
1404 	mmCOMPUTE_NUM_THREAD_Z, 1,
1405 	mmCOMPUTE_PGM_RSRC2, 20,
1406 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1407 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1408 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1409 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1410 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1411 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1412 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1413 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1414 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1415 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1416 };
1417 
1418 static const u32 sgpr1_init_regs[] =
1419 {
1420 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1421 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1422 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1423 	mmCOMPUTE_NUM_THREAD_Y, 1,
1424 	mmCOMPUTE_NUM_THREAD_Z, 1,
1425 	mmCOMPUTE_PGM_RSRC2, 20,
1426 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1427 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1428 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1429 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1430 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1431 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1432 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1433 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1434 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1435 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1436 };
1437 
1438 static const u32 sgpr2_init_regs[] =
1439 {
1440 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1441 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1442 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1443 	mmCOMPUTE_NUM_THREAD_Y, 1,
1444 	mmCOMPUTE_NUM_THREAD_Z, 1,
1445 	mmCOMPUTE_PGM_RSRC2, 20,
1446 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1447 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1448 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1449 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1450 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1451 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1452 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1453 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1454 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1455 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1456 };
1457 
1458 static const u32 sec_ded_counter_registers[] =
1459 {
1460 	mmCPC_EDC_ATC_CNT,
1461 	mmCPC_EDC_SCRATCH_CNT,
1462 	mmCPC_EDC_UCODE_CNT,
1463 	mmCPF_EDC_ATC_CNT,
1464 	mmCPF_EDC_ROQ_CNT,
1465 	mmCPF_EDC_TAG_CNT,
1466 	mmCPG_EDC_ATC_CNT,
1467 	mmCPG_EDC_DMA_CNT,
1468 	mmCPG_EDC_TAG_CNT,
1469 	mmDC_EDC_CSINVOC_CNT,
1470 	mmDC_EDC_RESTORE_CNT,
1471 	mmDC_EDC_STATE_CNT,
1472 	mmGDS_EDC_CNT,
1473 	mmGDS_EDC_GRBM_CNT,
1474 	mmGDS_EDC_OA_DED,
1475 	mmSPI_EDC_CNT,
1476 	mmSQC_ATC_EDC_GATCL1_CNT,
1477 	mmSQC_EDC_CNT,
1478 	mmSQ_EDC_DED_CNT,
1479 	mmSQ_EDC_INFO,
1480 	mmSQ_EDC_SEC_CNT,
1481 	mmTCC_EDC_CNT,
1482 	mmTCP_ATC_EDC_GATCL1_CNT,
1483 	mmTCP_EDC_CNT,
1484 	mmTD_EDC_CNT
1485 };
1486 
1487 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1488 {
1489 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1490 	struct amdgpu_ib ib;
1491 	struct dma_fence *f = NULL;
1492 	int r, i;
1493 	u32 tmp;
1494 	unsigned total_size, vgpr_offset, sgpr_offset;
1495 	u64 gpu_addr;
1496 
1497 	/* only supported on CZ */
1498 	if (adev->asic_type != CHIP_CARRIZO)
1499 		return 0;
1500 
1501 	/* bail if the compute ring is not ready */
1502 	if (!ring->ready)
1503 		return 0;
1504 
1505 	tmp = RREG32(mmGB_EDC_MODE);
1506 	WREG32(mmGB_EDC_MODE, 0);
1507 
1508 	total_size =
1509 		(((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1510 	total_size +=
1511 		(((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1512 	total_size +=
1513 		(((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1514 	total_size = ALIGN(total_size, 256);
1515 	vgpr_offset = total_size;
1516 	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1517 	sgpr_offset = total_size;
1518 	total_size += sizeof(sgpr_init_compute_shader);
1519 
1520 	/* allocate an indirect buffer to put the commands in */
1521 	memset(&ib, 0, sizeof(ib));
1522 	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1523 	if (r) {
1524 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1525 		return r;
1526 	}
1527 
1528 	/* load the compute shaders */
1529 	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1530 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1531 
1532 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1533 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1534 
1535 	/* init the ib length to 0 */
1536 	ib.length_dw = 0;
1537 
1538 	/* VGPR */
1539 	/* write the register state for the compute dispatch */
1540 	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1541 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1542 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1543 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1544 	}
1545 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1546 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1547 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1548 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1549 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1550 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1551 
1552 	/* write dispatch packet */
1553 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1554 	ib.ptr[ib.length_dw++] = 8; /* x */
1555 	ib.ptr[ib.length_dw++] = 1; /* y */
1556 	ib.ptr[ib.length_dw++] = 1; /* z */
1557 	ib.ptr[ib.length_dw++] =
1558 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1559 
1560 	/* write CS partial flush packet */
1561 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1562 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1563 
1564 	/* SGPR1 */
1565 	/* write the register state for the compute dispatch */
1566 	for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1567 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1568 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1569 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1570 	}
1571 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1572 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1573 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1574 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1575 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1576 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1577 
1578 	/* write dispatch packet */
1579 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1580 	ib.ptr[ib.length_dw++] = 8; /* x */
1581 	ib.ptr[ib.length_dw++] = 1; /* y */
1582 	ib.ptr[ib.length_dw++] = 1; /* z */
1583 	ib.ptr[ib.length_dw++] =
1584 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1585 
1586 	/* write CS partial flush packet */
1587 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1588 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1589 
1590 	/* SGPR2 */
1591 	/* write the register state for the compute dispatch */
1592 	for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1593 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1594 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1595 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1596 	}
1597 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1598 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1599 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1600 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1601 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1602 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1603 
1604 	/* write dispatch packet */
1605 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1606 	ib.ptr[ib.length_dw++] = 8; /* x */
1607 	ib.ptr[ib.length_dw++] = 1; /* y */
1608 	ib.ptr[ib.length_dw++] = 1; /* z */
1609 	ib.ptr[ib.length_dw++] =
1610 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1611 
1612 	/* write CS partial flush packet */
1613 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1614 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1615 
1616 	/* shedule the ib on the ring */
1617 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1618 	if (r) {
1619 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1620 		goto fail;
1621 	}
1622 
1623 	/* wait for the GPU to finish processing the IB */
1624 	r = dma_fence_wait(f, false);
1625 	if (r) {
1626 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1627 		goto fail;
1628 	}
1629 
1630 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1631 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1632 	WREG32(mmGB_EDC_MODE, tmp);
1633 
1634 	tmp = RREG32(mmCC_GC_EDC_CONFIG);
1635 	tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1636 	WREG32(mmCC_GC_EDC_CONFIG, tmp);
1637 
1638 
1639 	/* read back registers to clear the counters */
1640 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1641 		RREG32(sec_ded_counter_registers[i]);
1642 
1643 fail:
1644 	amdgpu_ib_free(adev, &ib, NULL);
1645 	dma_fence_put(f);
1646 
1647 	return r;
1648 }
1649 
1650 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1651 {
1652 	u32 gb_addr_config;
1653 	u32 mc_shared_chmap, mc_arb_ramcfg;
1654 	u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1655 	u32 tmp;
1656 	int ret;
1657 
1658 	switch (adev->asic_type) {
1659 	case CHIP_TOPAZ:
1660 		adev->gfx.config.max_shader_engines = 1;
1661 		adev->gfx.config.max_tile_pipes = 2;
1662 		adev->gfx.config.max_cu_per_sh = 6;
1663 		adev->gfx.config.max_sh_per_se = 1;
1664 		adev->gfx.config.max_backends_per_se = 2;
1665 		adev->gfx.config.max_texture_channel_caches = 2;
1666 		adev->gfx.config.max_gprs = 256;
1667 		adev->gfx.config.max_gs_threads = 32;
1668 		adev->gfx.config.max_hw_contexts = 8;
1669 
1670 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1671 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1672 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1673 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1674 		gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1675 		break;
1676 	case CHIP_FIJI:
1677 		adev->gfx.config.max_shader_engines = 4;
1678 		adev->gfx.config.max_tile_pipes = 16;
1679 		adev->gfx.config.max_cu_per_sh = 16;
1680 		adev->gfx.config.max_sh_per_se = 1;
1681 		adev->gfx.config.max_backends_per_se = 4;
1682 		adev->gfx.config.max_texture_channel_caches = 16;
1683 		adev->gfx.config.max_gprs = 256;
1684 		adev->gfx.config.max_gs_threads = 32;
1685 		adev->gfx.config.max_hw_contexts = 8;
1686 
1687 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1688 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1689 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1690 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1691 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1692 		break;
1693 	case CHIP_POLARIS11:
1694 	case CHIP_POLARIS12:
1695 		ret = amdgpu_atombios_get_gfx_info(adev);
1696 		if (ret)
1697 			return ret;
1698 		adev->gfx.config.max_gprs = 256;
1699 		adev->gfx.config.max_gs_threads = 32;
1700 		adev->gfx.config.max_hw_contexts = 8;
1701 
1702 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1703 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1704 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1705 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1706 		gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1707 		break;
1708 	case CHIP_POLARIS10:
1709 		ret = amdgpu_atombios_get_gfx_info(adev);
1710 		if (ret)
1711 			return ret;
1712 		adev->gfx.config.max_gprs = 256;
1713 		adev->gfx.config.max_gs_threads = 32;
1714 		adev->gfx.config.max_hw_contexts = 8;
1715 
1716 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1717 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1718 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1719 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1720 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1721 		break;
1722 	case CHIP_TONGA:
1723 		adev->gfx.config.max_shader_engines = 4;
1724 		adev->gfx.config.max_tile_pipes = 8;
1725 		adev->gfx.config.max_cu_per_sh = 8;
1726 		adev->gfx.config.max_sh_per_se = 1;
1727 		adev->gfx.config.max_backends_per_se = 2;
1728 		adev->gfx.config.max_texture_channel_caches = 8;
1729 		adev->gfx.config.max_gprs = 256;
1730 		adev->gfx.config.max_gs_threads = 32;
1731 		adev->gfx.config.max_hw_contexts = 8;
1732 
1733 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1734 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1735 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1736 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1737 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1738 		break;
1739 	case CHIP_CARRIZO:
1740 		adev->gfx.config.max_shader_engines = 1;
1741 		adev->gfx.config.max_tile_pipes = 2;
1742 		adev->gfx.config.max_sh_per_se = 1;
1743 		adev->gfx.config.max_backends_per_se = 2;
1744 		adev->gfx.config.max_cu_per_sh = 8;
1745 		adev->gfx.config.max_texture_channel_caches = 2;
1746 		adev->gfx.config.max_gprs = 256;
1747 		adev->gfx.config.max_gs_threads = 32;
1748 		adev->gfx.config.max_hw_contexts = 8;
1749 
1750 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1751 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1752 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1753 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1754 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1755 		break;
1756 	case CHIP_STONEY:
1757 		adev->gfx.config.max_shader_engines = 1;
1758 		adev->gfx.config.max_tile_pipes = 2;
1759 		adev->gfx.config.max_sh_per_se = 1;
1760 		adev->gfx.config.max_backends_per_se = 1;
1761 		adev->gfx.config.max_cu_per_sh = 3;
1762 		adev->gfx.config.max_texture_channel_caches = 2;
1763 		adev->gfx.config.max_gprs = 256;
1764 		adev->gfx.config.max_gs_threads = 16;
1765 		adev->gfx.config.max_hw_contexts = 8;
1766 
1767 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1768 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1769 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1770 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1771 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1772 		break;
1773 	default:
1774 		adev->gfx.config.max_shader_engines = 2;
1775 		adev->gfx.config.max_tile_pipes = 4;
1776 		adev->gfx.config.max_cu_per_sh = 2;
1777 		adev->gfx.config.max_sh_per_se = 1;
1778 		adev->gfx.config.max_backends_per_se = 2;
1779 		adev->gfx.config.max_texture_channel_caches = 4;
1780 		adev->gfx.config.max_gprs = 256;
1781 		adev->gfx.config.max_gs_threads = 32;
1782 		adev->gfx.config.max_hw_contexts = 8;
1783 
1784 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1785 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1786 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1787 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1788 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1789 		break;
1790 	}
1791 
1792 	mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1793 	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1794 	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1795 
1796 	adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1797 	adev->gfx.config.mem_max_burst_length_bytes = 256;
1798 	if (adev->flags & AMD_IS_APU) {
1799 		/* Get memory bank mapping mode. */
1800 		tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1801 		dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1802 		dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1803 
1804 		tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1805 		dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1806 		dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1807 
1808 		/* Validate settings in case only one DIMM installed. */
1809 		if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1810 			dimm00_addr_map = 0;
1811 		if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1812 			dimm01_addr_map = 0;
1813 		if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1814 			dimm10_addr_map = 0;
1815 		if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1816 			dimm11_addr_map = 0;
1817 
1818 		/* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1819 		/* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1820 		if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1821 			adev->gfx.config.mem_row_size_in_kb = 2;
1822 		else
1823 			adev->gfx.config.mem_row_size_in_kb = 1;
1824 	} else {
1825 		tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1826 		adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1827 		if (adev->gfx.config.mem_row_size_in_kb > 4)
1828 			adev->gfx.config.mem_row_size_in_kb = 4;
1829 	}
1830 
1831 	adev->gfx.config.shader_engine_tile_size = 32;
1832 	adev->gfx.config.num_gpus = 1;
1833 	adev->gfx.config.multi_gpu_tile_size = 64;
1834 
1835 	/* fix up row size */
1836 	switch (adev->gfx.config.mem_row_size_in_kb) {
1837 	case 1:
1838 	default:
1839 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1840 		break;
1841 	case 2:
1842 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1843 		break;
1844 	case 4:
1845 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1846 		break;
1847 	}
1848 	adev->gfx.config.gb_addr_config = gb_addr_config;
1849 
1850 	return 0;
1851 }
1852 
1853 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1854 					int mec, int pipe, int queue)
1855 {
1856 	int r;
1857 	unsigned irq_type;
1858 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1859 
1860 	ring = &adev->gfx.compute_ring[ring_id];
1861 
1862 	/* mec0 is me1 */
1863 	ring->me = mec + 1;
1864 	ring->pipe = pipe;
1865 	ring->queue = queue;
1866 
1867 	ring->ring_obj = NULL;
1868 	ring->use_doorbell = true;
1869 	ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
1870 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1871 				+ (ring_id * GFX8_MEC_HPD_SIZE);
1872 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1873 
1874 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1875 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1876 		+ ring->pipe;
1877 
1878 	/* type-2 packets are deprecated on MEC, use type-3 instead */
1879 	r = amdgpu_ring_init(adev, ring, 1024,
1880 			&adev->gfx.eop_irq, irq_type);
1881 	if (r)
1882 		return r;
1883 
1884 
1885 	return 0;
1886 }
1887 
1888 static int gfx_v8_0_sw_init(void *handle)
1889 {
1890 	int i, j, k, r, ring_id;
1891 	struct amdgpu_ring *ring;
1892 	struct amdgpu_kiq *kiq;
1893 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1894 
1895 	switch (adev->asic_type) {
1896 	case CHIP_FIJI:
1897 	case CHIP_TONGA:
1898 	case CHIP_POLARIS11:
1899 	case CHIP_POLARIS12:
1900 	case CHIP_POLARIS10:
1901 	case CHIP_CARRIZO:
1902 		adev->gfx.mec.num_mec = 2;
1903 		break;
1904 	case CHIP_TOPAZ:
1905 	case CHIP_STONEY:
1906 	default:
1907 		adev->gfx.mec.num_mec = 1;
1908 		break;
1909 	}
1910 
1911 	adev->gfx.mec.num_pipe_per_mec = 4;
1912 	adev->gfx.mec.num_queue_per_pipe = 8;
1913 
1914 	/* KIQ event */
1915 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
1916 	if (r)
1917 		return r;
1918 
1919 	/* EOP Event */
1920 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
1921 	if (r)
1922 		return r;
1923 
1924 	/* Privileged reg */
1925 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
1926 			      &adev->gfx.priv_reg_irq);
1927 	if (r)
1928 		return r;
1929 
1930 	/* Privileged inst */
1931 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
1932 			      &adev->gfx.priv_inst_irq);
1933 	if (r)
1934 		return r;
1935 
1936 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1937 
1938 	gfx_v8_0_scratch_init(adev);
1939 
1940 	r = gfx_v8_0_init_microcode(adev);
1941 	if (r) {
1942 		DRM_ERROR("Failed to load gfx firmware!\n");
1943 		return r;
1944 	}
1945 
1946 	r = gfx_v8_0_rlc_init(adev);
1947 	if (r) {
1948 		DRM_ERROR("Failed to init rlc BOs!\n");
1949 		return r;
1950 	}
1951 
1952 	r = gfx_v8_0_mec_init(adev);
1953 	if (r) {
1954 		DRM_ERROR("Failed to init MEC BOs!\n");
1955 		return r;
1956 	}
1957 
1958 	/* set up the gfx ring */
1959 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1960 		ring = &adev->gfx.gfx_ring[i];
1961 		ring->ring_obj = NULL;
1962 		sprintf(ring->name, "gfx");
1963 		/* no gfx doorbells on iceland */
1964 		if (adev->asic_type != CHIP_TOPAZ) {
1965 			ring->use_doorbell = true;
1966 			ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
1967 		}
1968 
1969 		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
1970 				     AMDGPU_CP_IRQ_GFX_EOP);
1971 		if (r)
1972 			return r;
1973 	}
1974 
1975 
1976 	/* set up the compute queues - allocate horizontally across pipes */
1977 	ring_id = 0;
1978 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1979 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1980 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1981 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
1982 					continue;
1983 
1984 				r = gfx_v8_0_compute_ring_init(adev,
1985 								ring_id,
1986 								i, k, j);
1987 				if (r)
1988 					return r;
1989 
1990 				ring_id++;
1991 			}
1992 		}
1993 	}
1994 
1995 	r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
1996 	if (r) {
1997 		DRM_ERROR("Failed to init KIQ BOs!\n");
1998 		return r;
1999 	}
2000 
2001 	kiq = &adev->gfx.kiq;
2002 	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2003 	if (r)
2004 		return r;
2005 
2006 	/* create MQD for all compute queues as well as KIQ for SRIOV case */
2007 	r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2008 	if (r)
2009 		return r;
2010 
2011 	/* reserve GDS, GWS and OA resource for gfx */
2012 	r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2013 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2014 				    &adev->gds.gds_gfx_bo, NULL, NULL);
2015 	if (r)
2016 		return r;
2017 
2018 	r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2019 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2020 				    &adev->gds.gws_gfx_bo, NULL, NULL);
2021 	if (r)
2022 		return r;
2023 
2024 	r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2025 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2026 				    &adev->gds.oa_gfx_bo, NULL, NULL);
2027 	if (r)
2028 		return r;
2029 
2030 	adev->gfx.ce_ram_size = 0x8000;
2031 
2032 	r = gfx_v8_0_gpu_early_init(adev);
2033 	if (r)
2034 		return r;
2035 
2036 	return 0;
2037 }
2038 
2039 static int gfx_v8_0_sw_fini(void *handle)
2040 {
2041 	int i;
2042 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2043 
2044 	amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2045 	amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2046 	amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2047 
2048 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2049 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2050 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2051 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2052 
2053 	amdgpu_gfx_compute_mqd_sw_fini(adev);
2054 	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2055 	amdgpu_gfx_kiq_fini(adev);
2056 
2057 	gfx_v8_0_mec_fini(adev);
2058 	gfx_v8_0_rlc_fini(adev);
2059 	gfx_v8_0_free_microcode(adev);
2060 
2061 	return 0;
2062 }
2063 
2064 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2065 {
2066 	uint32_t *modearray, *mod2array;
2067 	const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2068 	const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2069 	u32 reg_offset;
2070 
2071 	modearray = adev->gfx.config.tile_mode_array;
2072 	mod2array = adev->gfx.config.macrotile_mode_array;
2073 
2074 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2075 		modearray[reg_offset] = 0;
2076 
2077 	for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2078 		mod2array[reg_offset] = 0;
2079 
2080 	switch (adev->asic_type) {
2081 	case CHIP_TOPAZ:
2082 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2083 				PIPE_CONFIG(ADDR_SURF_P2) |
2084 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2085 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2086 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2087 				PIPE_CONFIG(ADDR_SURF_P2) |
2088 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2089 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2090 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2091 				PIPE_CONFIG(ADDR_SURF_P2) |
2092 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2093 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2094 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2095 				PIPE_CONFIG(ADDR_SURF_P2) |
2096 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2097 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2098 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2099 				PIPE_CONFIG(ADDR_SURF_P2) |
2100 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2101 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2102 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2103 				PIPE_CONFIG(ADDR_SURF_P2) |
2104 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2105 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2106 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2107 				PIPE_CONFIG(ADDR_SURF_P2) |
2108 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2109 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2110 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2111 				PIPE_CONFIG(ADDR_SURF_P2));
2112 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2113 				PIPE_CONFIG(ADDR_SURF_P2) |
2114 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2115 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2116 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2117 				 PIPE_CONFIG(ADDR_SURF_P2) |
2118 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2119 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2120 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2121 				 PIPE_CONFIG(ADDR_SURF_P2) |
2122 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2123 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2124 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2125 				 PIPE_CONFIG(ADDR_SURF_P2) |
2126 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2127 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2128 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2129 				 PIPE_CONFIG(ADDR_SURF_P2) |
2130 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2131 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2132 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2133 				 PIPE_CONFIG(ADDR_SURF_P2) |
2134 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2135 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2136 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2137 				 PIPE_CONFIG(ADDR_SURF_P2) |
2138 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2139 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2140 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2141 				 PIPE_CONFIG(ADDR_SURF_P2) |
2142 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2143 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2144 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2145 				 PIPE_CONFIG(ADDR_SURF_P2) |
2146 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2147 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2148 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2149 				 PIPE_CONFIG(ADDR_SURF_P2) |
2150 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2151 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2152 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2153 				 PIPE_CONFIG(ADDR_SURF_P2) |
2154 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2155 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2156 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2157 				 PIPE_CONFIG(ADDR_SURF_P2) |
2158 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2159 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2160 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2161 				 PIPE_CONFIG(ADDR_SURF_P2) |
2162 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2163 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2164 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2165 				 PIPE_CONFIG(ADDR_SURF_P2) |
2166 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2167 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2168 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2169 				 PIPE_CONFIG(ADDR_SURF_P2) |
2170 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2171 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2172 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2173 				 PIPE_CONFIG(ADDR_SURF_P2) |
2174 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2175 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2176 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2177 				 PIPE_CONFIG(ADDR_SURF_P2) |
2178 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2179 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2180 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2181 				 PIPE_CONFIG(ADDR_SURF_P2) |
2182 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2183 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2184 
2185 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2186 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2187 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2188 				NUM_BANKS(ADDR_SURF_8_BANK));
2189 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2190 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2191 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2192 				NUM_BANKS(ADDR_SURF_8_BANK));
2193 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2194 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2195 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2196 				NUM_BANKS(ADDR_SURF_8_BANK));
2197 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2198 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2199 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2200 				NUM_BANKS(ADDR_SURF_8_BANK));
2201 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2202 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2203 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2204 				NUM_BANKS(ADDR_SURF_8_BANK));
2205 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2206 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2207 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2208 				NUM_BANKS(ADDR_SURF_8_BANK));
2209 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2210 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2211 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2212 				NUM_BANKS(ADDR_SURF_8_BANK));
2213 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2214 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2215 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2216 				NUM_BANKS(ADDR_SURF_16_BANK));
2217 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2218 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2219 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2220 				NUM_BANKS(ADDR_SURF_16_BANK));
2221 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2222 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2223 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2224 				 NUM_BANKS(ADDR_SURF_16_BANK));
2225 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2226 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2227 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2228 				 NUM_BANKS(ADDR_SURF_16_BANK));
2229 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2230 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2231 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2232 				 NUM_BANKS(ADDR_SURF_16_BANK));
2233 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2234 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2235 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2236 				 NUM_BANKS(ADDR_SURF_16_BANK));
2237 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2238 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2239 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2240 				 NUM_BANKS(ADDR_SURF_8_BANK));
2241 
2242 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2243 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2244 			    reg_offset != 23)
2245 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2246 
2247 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2248 			if (reg_offset != 7)
2249 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2250 
2251 		break;
2252 	case CHIP_FIJI:
2253 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2254 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2255 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2256 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2257 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2258 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2259 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2260 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2261 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2262 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2263 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2264 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2265 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2266 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2267 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2268 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2269 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2270 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2271 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2272 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2273 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2274 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2275 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2276 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2277 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2278 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2279 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2280 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2281 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2282 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2283 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2284 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2285 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2286 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2287 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2288 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2289 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2290 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2291 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2292 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2293 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2294 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2295 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2296 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2297 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2298 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2299 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2300 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2301 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2302 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2303 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2304 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2305 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2306 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2307 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2308 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2309 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2310 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2311 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2312 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2313 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2314 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2315 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2316 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2317 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2318 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2319 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2320 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2321 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2322 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2323 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2324 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2325 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2326 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2327 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2328 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2329 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2330 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2331 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2332 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2333 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2334 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2335 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2336 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2337 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2338 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2339 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2340 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2341 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2342 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2343 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2344 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2345 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2346 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2347 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2348 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2349 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2350 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2351 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2352 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2353 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2354 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2355 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2356 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2357 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2358 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2359 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2360 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2361 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2362 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2363 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2364 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2365 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2366 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2367 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2368 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2369 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2370 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2371 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2372 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2373 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2374 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2375 
2376 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2377 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2378 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2379 				NUM_BANKS(ADDR_SURF_8_BANK));
2380 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2381 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2382 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2383 				NUM_BANKS(ADDR_SURF_8_BANK));
2384 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2385 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2386 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2387 				NUM_BANKS(ADDR_SURF_8_BANK));
2388 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2389 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2390 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2391 				NUM_BANKS(ADDR_SURF_8_BANK));
2392 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2393 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2394 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2395 				NUM_BANKS(ADDR_SURF_8_BANK));
2396 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2397 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2398 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2399 				NUM_BANKS(ADDR_SURF_8_BANK));
2400 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2401 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2402 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2403 				NUM_BANKS(ADDR_SURF_8_BANK));
2404 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2405 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2406 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2407 				NUM_BANKS(ADDR_SURF_8_BANK));
2408 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2409 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2410 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2411 				NUM_BANKS(ADDR_SURF_8_BANK));
2412 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2413 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2414 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2415 				 NUM_BANKS(ADDR_SURF_8_BANK));
2416 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2417 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2418 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2419 				 NUM_BANKS(ADDR_SURF_8_BANK));
2420 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2421 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2422 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2423 				 NUM_BANKS(ADDR_SURF_8_BANK));
2424 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2425 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2426 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2427 				 NUM_BANKS(ADDR_SURF_8_BANK));
2428 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2429 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2430 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2431 				 NUM_BANKS(ADDR_SURF_4_BANK));
2432 
2433 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2434 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2435 
2436 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2437 			if (reg_offset != 7)
2438 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2439 
2440 		break;
2441 	case CHIP_TONGA:
2442 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2443 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2444 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2445 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2446 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2447 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2448 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2449 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2450 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2451 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2452 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2453 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2454 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2455 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2456 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2457 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2458 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2459 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2460 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2461 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2462 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2463 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2464 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2465 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2466 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2467 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2468 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2469 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2470 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2471 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2472 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2473 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2474 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2475 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2476 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2477 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2478 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2479 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2480 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2481 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2482 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2483 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2484 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2485 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2486 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2487 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2488 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2489 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2490 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2491 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2492 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2493 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2494 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2495 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2496 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2497 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2498 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2499 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2500 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2501 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2502 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2503 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2504 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2505 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2506 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2507 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2508 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2509 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2510 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2511 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2512 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2513 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2514 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2515 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2516 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2517 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2518 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2519 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2520 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2521 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2522 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2523 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2524 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2525 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2526 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2527 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2528 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2529 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2530 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2531 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2532 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2533 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2534 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2535 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2536 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2537 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2538 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2539 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2540 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2541 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2542 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2543 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2544 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2545 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2546 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2547 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2548 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2549 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2550 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2551 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2552 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2553 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2554 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2555 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2556 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2557 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2558 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2559 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2560 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2561 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2562 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2563 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2564 
2565 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2566 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2567 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2568 				NUM_BANKS(ADDR_SURF_16_BANK));
2569 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2570 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2571 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2572 				NUM_BANKS(ADDR_SURF_16_BANK));
2573 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2574 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2575 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2576 				NUM_BANKS(ADDR_SURF_16_BANK));
2577 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2578 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2579 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2580 				NUM_BANKS(ADDR_SURF_16_BANK));
2581 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2582 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2583 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2584 				NUM_BANKS(ADDR_SURF_16_BANK));
2585 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2586 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2587 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2588 				NUM_BANKS(ADDR_SURF_16_BANK));
2589 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2590 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2591 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2592 				NUM_BANKS(ADDR_SURF_16_BANK));
2593 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2594 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2595 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2596 				NUM_BANKS(ADDR_SURF_16_BANK));
2597 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2598 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2599 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2600 				NUM_BANKS(ADDR_SURF_16_BANK));
2601 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2602 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2603 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2604 				 NUM_BANKS(ADDR_SURF_16_BANK));
2605 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2606 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2607 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2608 				 NUM_BANKS(ADDR_SURF_16_BANK));
2609 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2610 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2611 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2612 				 NUM_BANKS(ADDR_SURF_8_BANK));
2613 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2614 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2615 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2616 				 NUM_BANKS(ADDR_SURF_4_BANK));
2617 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2618 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2619 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2620 				 NUM_BANKS(ADDR_SURF_4_BANK));
2621 
2622 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2623 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2624 
2625 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2626 			if (reg_offset != 7)
2627 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2628 
2629 		break;
2630 	case CHIP_POLARIS11:
2631 	case CHIP_POLARIS12:
2632 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2633 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2634 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2635 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2636 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2637 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2638 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2639 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2640 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2641 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2642 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2643 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2644 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2645 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2646 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2647 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2648 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2649 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2650 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2651 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2652 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2653 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2654 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2655 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2656 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2657 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2658 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2659 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2660 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2661 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2662 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2663 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2664 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2665 				PIPE_CONFIG(ADDR_SURF_P4_16x16));
2666 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2667 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2668 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2669 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2670 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2671 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2672 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2673 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2674 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2675 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2676 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2677 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2678 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2679 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2680 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2681 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2682 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2683 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2684 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2685 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2686 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2687 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2688 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2689 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2690 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2691 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2692 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2693 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2694 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2695 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2696 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2697 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2698 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2699 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2700 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2701 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2702 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2703 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2704 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2705 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2706 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2707 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2708 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2709 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2710 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2711 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2712 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2713 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2714 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2715 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2716 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2717 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2718 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2719 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2720 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2721 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2722 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2723 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2724 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2725 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2726 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2727 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2728 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2729 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2730 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2731 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2732 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2733 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2734 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2735 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2736 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2737 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2738 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2739 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2740 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2741 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2742 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2743 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2744 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2745 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2746 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2747 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2748 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2749 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2750 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2751 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2752 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2753 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2754 
2755 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2756 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2757 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2758 				NUM_BANKS(ADDR_SURF_16_BANK));
2759 
2760 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2761 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2762 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2763 				NUM_BANKS(ADDR_SURF_16_BANK));
2764 
2765 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2766 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2767 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2768 				NUM_BANKS(ADDR_SURF_16_BANK));
2769 
2770 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2771 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2772 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2773 				NUM_BANKS(ADDR_SURF_16_BANK));
2774 
2775 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2776 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2777 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2778 				NUM_BANKS(ADDR_SURF_16_BANK));
2779 
2780 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2781 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2782 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2783 				NUM_BANKS(ADDR_SURF_16_BANK));
2784 
2785 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2786 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2787 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2788 				NUM_BANKS(ADDR_SURF_16_BANK));
2789 
2790 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2791 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2792 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2793 				NUM_BANKS(ADDR_SURF_16_BANK));
2794 
2795 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2796 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2797 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2798 				NUM_BANKS(ADDR_SURF_16_BANK));
2799 
2800 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2801 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2802 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2803 				NUM_BANKS(ADDR_SURF_16_BANK));
2804 
2805 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2806 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2807 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2808 				NUM_BANKS(ADDR_SURF_16_BANK));
2809 
2810 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2811 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2812 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2813 				NUM_BANKS(ADDR_SURF_16_BANK));
2814 
2815 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2816 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2817 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2818 				NUM_BANKS(ADDR_SURF_8_BANK));
2819 
2820 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2821 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2822 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2823 				NUM_BANKS(ADDR_SURF_4_BANK));
2824 
2825 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2826 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2827 
2828 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2829 			if (reg_offset != 7)
2830 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2831 
2832 		break;
2833 	case CHIP_POLARIS10:
2834 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2835 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2836 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2837 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2838 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2839 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2840 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2841 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2842 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2843 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2844 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2845 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2846 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2847 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2848 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2849 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2850 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2851 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2852 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2853 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2854 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2855 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2856 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2857 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2858 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2859 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2860 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2861 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2862 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2863 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2864 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2865 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2866 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2867 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2868 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2869 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2870 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2871 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2872 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2873 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2874 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2875 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2876 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2877 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2878 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2879 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2880 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2881 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2882 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2883 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2884 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2885 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2886 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2887 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2888 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2889 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2890 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2891 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2892 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2893 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2894 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2895 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2896 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2897 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2898 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2899 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2900 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2901 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2902 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2903 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2904 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2905 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2906 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2907 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2908 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2909 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2910 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2911 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2912 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2913 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2914 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2915 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2916 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2917 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2918 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2919 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2920 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2921 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2922 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2923 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2924 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2925 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2926 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2927 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2928 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2929 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2930 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2931 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2932 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2933 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2934 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2935 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2936 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2937 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2938 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2939 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2940 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2941 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2942 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2943 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2944 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2945 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2946 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2947 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2948 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2949 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2950 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2951 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2952 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2953 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2954 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2955 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2956 
2957 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2958 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2959 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2960 				NUM_BANKS(ADDR_SURF_16_BANK));
2961 
2962 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2963 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2964 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2965 				NUM_BANKS(ADDR_SURF_16_BANK));
2966 
2967 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2968 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2969 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2970 				NUM_BANKS(ADDR_SURF_16_BANK));
2971 
2972 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2973 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2974 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2975 				NUM_BANKS(ADDR_SURF_16_BANK));
2976 
2977 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2978 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2979 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2980 				NUM_BANKS(ADDR_SURF_16_BANK));
2981 
2982 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2983 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2984 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2985 				NUM_BANKS(ADDR_SURF_16_BANK));
2986 
2987 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2988 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2989 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2990 				NUM_BANKS(ADDR_SURF_16_BANK));
2991 
2992 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2993 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2994 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2995 				NUM_BANKS(ADDR_SURF_16_BANK));
2996 
2997 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2998 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2999 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3000 				NUM_BANKS(ADDR_SURF_16_BANK));
3001 
3002 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3003 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3004 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3005 				NUM_BANKS(ADDR_SURF_16_BANK));
3006 
3007 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3008 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3009 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3010 				NUM_BANKS(ADDR_SURF_16_BANK));
3011 
3012 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3013 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3014 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3015 				NUM_BANKS(ADDR_SURF_8_BANK));
3016 
3017 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3018 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3019 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3020 				NUM_BANKS(ADDR_SURF_4_BANK));
3021 
3022 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3023 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3024 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3025 				NUM_BANKS(ADDR_SURF_4_BANK));
3026 
3027 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3028 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3029 
3030 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3031 			if (reg_offset != 7)
3032 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3033 
3034 		break;
3035 	case CHIP_STONEY:
3036 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3037 				PIPE_CONFIG(ADDR_SURF_P2) |
3038 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3039 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3040 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3041 				PIPE_CONFIG(ADDR_SURF_P2) |
3042 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3043 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3044 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3045 				PIPE_CONFIG(ADDR_SURF_P2) |
3046 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3047 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3048 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3049 				PIPE_CONFIG(ADDR_SURF_P2) |
3050 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3051 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3052 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3053 				PIPE_CONFIG(ADDR_SURF_P2) |
3054 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3055 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3056 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3057 				PIPE_CONFIG(ADDR_SURF_P2) |
3058 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3059 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3060 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3061 				PIPE_CONFIG(ADDR_SURF_P2) |
3062 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3063 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3064 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3065 				PIPE_CONFIG(ADDR_SURF_P2));
3066 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3067 				PIPE_CONFIG(ADDR_SURF_P2) |
3068 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3069 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3070 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3071 				 PIPE_CONFIG(ADDR_SURF_P2) |
3072 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3073 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3074 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3075 				 PIPE_CONFIG(ADDR_SURF_P2) |
3076 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3077 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3078 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3079 				 PIPE_CONFIG(ADDR_SURF_P2) |
3080 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3081 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3082 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3083 				 PIPE_CONFIG(ADDR_SURF_P2) |
3084 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3085 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3086 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3087 				 PIPE_CONFIG(ADDR_SURF_P2) |
3088 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3089 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3090 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3091 				 PIPE_CONFIG(ADDR_SURF_P2) |
3092 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3093 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3094 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3095 				 PIPE_CONFIG(ADDR_SURF_P2) |
3096 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3097 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3098 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3099 				 PIPE_CONFIG(ADDR_SURF_P2) |
3100 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3101 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3102 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3103 				 PIPE_CONFIG(ADDR_SURF_P2) |
3104 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3105 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3106 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3107 				 PIPE_CONFIG(ADDR_SURF_P2) |
3108 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3109 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3110 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3111 				 PIPE_CONFIG(ADDR_SURF_P2) |
3112 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3113 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3114 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3115 				 PIPE_CONFIG(ADDR_SURF_P2) |
3116 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3117 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3118 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3119 				 PIPE_CONFIG(ADDR_SURF_P2) |
3120 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3121 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3122 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3123 				 PIPE_CONFIG(ADDR_SURF_P2) |
3124 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3125 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3126 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3127 				 PIPE_CONFIG(ADDR_SURF_P2) |
3128 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3129 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3130 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3131 				 PIPE_CONFIG(ADDR_SURF_P2) |
3132 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3133 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3134 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3135 				 PIPE_CONFIG(ADDR_SURF_P2) |
3136 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3137 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3138 
3139 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3140 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3141 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3142 				NUM_BANKS(ADDR_SURF_8_BANK));
3143 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3144 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3145 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3146 				NUM_BANKS(ADDR_SURF_8_BANK));
3147 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3148 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3149 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3150 				NUM_BANKS(ADDR_SURF_8_BANK));
3151 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3152 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3153 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3154 				NUM_BANKS(ADDR_SURF_8_BANK));
3155 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3156 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3157 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3158 				NUM_BANKS(ADDR_SURF_8_BANK));
3159 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3160 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3161 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3162 				NUM_BANKS(ADDR_SURF_8_BANK));
3163 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3164 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3165 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3166 				NUM_BANKS(ADDR_SURF_8_BANK));
3167 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3168 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3169 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3170 				NUM_BANKS(ADDR_SURF_16_BANK));
3171 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3172 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3173 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3174 				NUM_BANKS(ADDR_SURF_16_BANK));
3175 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3176 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3177 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3178 				 NUM_BANKS(ADDR_SURF_16_BANK));
3179 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3180 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3181 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3182 				 NUM_BANKS(ADDR_SURF_16_BANK));
3183 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3184 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3185 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3186 				 NUM_BANKS(ADDR_SURF_16_BANK));
3187 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3188 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3189 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3190 				 NUM_BANKS(ADDR_SURF_16_BANK));
3191 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3192 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3193 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3194 				 NUM_BANKS(ADDR_SURF_8_BANK));
3195 
3196 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3197 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3198 			    reg_offset != 23)
3199 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3200 
3201 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3202 			if (reg_offset != 7)
3203 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3204 
3205 		break;
3206 	default:
3207 		dev_warn(adev->dev,
3208 			 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3209 			 adev->asic_type);
3210 
3211 	case CHIP_CARRIZO:
3212 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3213 				PIPE_CONFIG(ADDR_SURF_P2) |
3214 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3215 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3216 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3217 				PIPE_CONFIG(ADDR_SURF_P2) |
3218 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3219 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3220 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3221 				PIPE_CONFIG(ADDR_SURF_P2) |
3222 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3223 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3224 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3225 				PIPE_CONFIG(ADDR_SURF_P2) |
3226 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3227 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3228 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3229 				PIPE_CONFIG(ADDR_SURF_P2) |
3230 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3231 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3232 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3233 				PIPE_CONFIG(ADDR_SURF_P2) |
3234 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3235 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3236 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3237 				PIPE_CONFIG(ADDR_SURF_P2) |
3238 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3239 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3240 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3241 				PIPE_CONFIG(ADDR_SURF_P2));
3242 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3243 				PIPE_CONFIG(ADDR_SURF_P2) |
3244 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3245 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3246 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3247 				 PIPE_CONFIG(ADDR_SURF_P2) |
3248 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3249 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3250 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3251 				 PIPE_CONFIG(ADDR_SURF_P2) |
3252 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3253 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3254 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3255 				 PIPE_CONFIG(ADDR_SURF_P2) |
3256 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3257 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3258 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3259 				 PIPE_CONFIG(ADDR_SURF_P2) |
3260 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3261 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3262 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3263 				 PIPE_CONFIG(ADDR_SURF_P2) |
3264 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3265 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3266 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3267 				 PIPE_CONFIG(ADDR_SURF_P2) |
3268 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3269 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3270 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3271 				 PIPE_CONFIG(ADDR_SURF_P2) |
3272 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3273 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3274 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3275 				 PIPE_CONFIG(ADDR_SURF_P2) |
3276 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3277 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3278 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3279 				 PIPE_CONFIG(ADDR_SURF_P2) |
3280 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3281 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3282 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3283 				 PIPE_CONFIG(ADDR_SURF_P2) |
3284 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3285 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3286 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3287 				 PIPE_CONFIG(ADDR_SURF_P2) |
3288 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3289 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3290 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3291 				 PIPE_CONFIG(ADDR_SURF_P2) |
3292 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3293 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3294 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3295 				 PIPE_CONFIG(ADDR_SURF_P2) |
3296 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3297 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3298 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3299 				 PIPE_CONFIG(ADDR_SURF_P2) |
3300 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3301 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3302 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3303 				 PIPE_CONFIG(ADDR_SURF_P2) |
3304 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3305 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3306 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3307 				 PIPE_CONFIG(ADDR_SURF_P2) |
3308 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3309 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3310 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3311 				 PIPE_CONFIG(ADDR_SURF_P2) |
3312 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3313 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3314 
3315 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3316 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3317 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3318 				NUM_BANKS(ADDR_SURF_8_BANK));
3319 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3320 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3321 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3322 				NUM_BANKS(ADDR_SURF_8_BANK));
3323 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3324 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3325 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3326 				NUM_BANKS(ADDR_SURF_8_BANK));
3327 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3328 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3329 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3330 				NUM_BANKS(ADDR_SURF_8_BANK));
3331 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3332 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3333 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3334 				NUM_BANKS(ADDR_SURF_8_BANK));
3335 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3336 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3337 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3338 				NUM_BANKS(ADDR_SURF_8_BANK));
3339 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3340 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3341 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3342 				NUM_BANKS(ADDR_SURF_8_BANK));
3343 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3344 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3345 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3346 				NUM_BANKS(ADDR_SURF_16_BANK));
3347 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3348 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3349 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3350 				NUM_BANKS(ADDR_SURF_16_BANK));
3351 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3352 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3353 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3354 				 NUM_BANKS(ADDR_SURF_16_BANK));
3355 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3356 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3357 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3358 				 NUM_BANKS(ADDR_SURF_16_BANK));
3359 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3360 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3361 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3362 				 NUM_BANKS(ADDR_SURF_16_BANK));
3363 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3364 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3365 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3366 				 NUM_BANKS(ADDR_SURF_16_BANK));
3367 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3368 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3369 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3370 				 NUM_BANKS(ADDR_SURF_8_BANK));
3371 
3372 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3373 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3374 			    reg_offset != 23)
3375 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3376 
3377 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3378 			if (reg_offset != 7)
3379 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3380 
3381 		break;
3382 	}
3383 }
3384 
3385 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3386 				  u32 se_num, u32 sh_num, u32 instance)
3387 {
3388 	u32 data;
3389 
3390 	if (instance == 0xffffffff)
3391 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3392 	else
3393 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3394 
3395 	if (se_num == 0xffffffff)
3396 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3397 	else
3398 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3399 
3400 	if (sh_num == 0xffffffff)
3401 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3402 	else
3403 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3404 
3405 	WREG32(mmGRBM_GFX_INDEX, data);
3406 }
3407 
3408 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3409 {
3410 	u32 data, mask;
3411 
3412 	data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3413 		RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3414 
3415 	data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3416 
3417 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3418 					 adev->gfx.config.max_sh_per_se);
3419 
3420 	return (~data) & mask;
3421 }
3422 
3423 static void
3424 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3425 {
3426 	switch (adev->asic_type) {
3427 	case CHIP_FIJI:
3428 		*rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3429 			  RB_XSEL2(1) | PKR_MAP(2) |
3430 			  PKR_XSEL(1) | PKR_YSEL(1) |
3431 			  SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3432 		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3433 			   SE_PAIR_YSEL(2);
3434 		break;
3435 	case CHIP_TONGA:
3436 	case CHIP_POLARIS10:
3437 		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3438 			  SE_XSEL(1) | SE_YSEL(1);
3439 		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3440 			   SE_PAIR_YSEL(2);
3441 		break;
3442 	case CHIP_TOPAZ:
3443 	case CHIP_CARRIZO:
3444 		*rconf |= RB_MAP_PKR0(2);
3445 		*rconf1 |= 0x0;
3446 		break;
3447 	case CHIP_POLARIS11:
3448 	case CHIP_POLARIS12:
3449 		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3450 			  SE_XSEL(1) | SE_YSEL(1);
3451 		*rconf1 |= 0x0;
3452 		break;
3453 	case CHIP_STONEY:
3454 		*rconf |= 0x0;
3455 		*rconf1 |= 0x0;
3456 		break;
3457 	default:
3458 		DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3459 		break;
3460 	}
3461 }
3462 
3463 static void
3464 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3465 					u32 raster_config, u32 raster_config_1,
3466 					unsigned rb_mask, unsigned num_rb)
3467 {
3468 	unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3469 	unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3470 	unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3471 	unsigned rb_per_se = num_rb / num_se;
3472 	unsigned se_mask[4];
3473 	unsigned se;
3474 
3475 	se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3476 	se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3477 	se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3478 	se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3479 
3480 	WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3481 	WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3482 	WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3483 
3484 	if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3485 			     (!se_mask[2] && !se_mask[3]))) {
3486 		raster_config_1 &= ~SE_PAIR_MAP_MASK;
3487 
3488 		if (!se_mask[0] && !se_mask[1]) {
3489 			raster_config_1 |=
3490 				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3491 		} else {
3492 			raster_config_1 |=
3493 				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3494 		}
3495 	}
3496 
3497 	for (se = 0; se < num_se; se++) {
3498 		unsigned raster_config_se = raster_config;
3499 		unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3500 		unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3501 		int idx = (se / 2) * 2;
3502 
3503 		if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3504 			raster_config_se &= ~SE_MAP_MASK;
3505 
3506 			if (!se_mask[idx]) {
3507 				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3508 			} else {
3509 				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3510 			}
3511 		}
3512 
3513 		pkr0_mask &= rb_mask;
3514 		pkr1_mask &= rb_mask;
3515 		if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3516 			raster_config_se &= ~PKR_MAP_MASK;
3517 
3518 			if (!pkr0_mask) {
3519 				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3520 			} else {
3521 				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3522 			}
3523 		}
3524 
3525 		if (rb_per_se >= 2) {
3526 			unsigned rb0_mask = 1 << (se * rb_per_se);
3527 			unsigned rb1_mask = rb0_mask << 1;
3528 
3529 			rb0_mask &= rb_mask;
3530 			rb1_mask &= rb_mask;
3531 			if (!rb0_mask || !rb1_mask) {
3532 				raster_config_se &= ~RB_MAP_PKR0_MASK;
3533 
3534 				if (!rb0_mask) {
3535 					raster_config_se |=
3536 						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3537 				} else {
3538 					raster_config_se |=
3539 						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3540 				}
3541 			}
3542 
3543 			if (rb_per_se > 2) {
3544 				rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3545 				rb1_mask = rb0_mask << 1;
3546 				rb0_mask &= rb_mask;
3547 				rb1_mask &= rb_mask;
3548 				if (!rb0_mask || !rb1_mask) {
3549 					raster_config_se &= ~RB_MAP_PKR1_MASK;
3550 
3551 					if (!rb0_mask) {
3552 						raster_config_se |=
3553 							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3554 					} else {
3555 						raster_config_se |=
3556 							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3557 					}
3558 				}
3559 			}
3560 		}
3561 
3562 		/* GRBM_GFX_INDEX has a different offset on VI */
3563 		gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3564 		WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3565 		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3566 	}
3567 
3568 	/* GRBM_GFX_INDEX has a different offset on VI */
3569 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3570 }
3571 
3572 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3573 {
3574 	int i, j;
3575 	u32 data;
3576 	u32 raster_config = 0, raster_config_1 = 0;
3577 	u32 active_rbs = 0;
3578 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3579 					adev->gfx.config.max_sh_per_se;
3580 	unsigned num_rb_pipes;
3581 
3582 	mutex_lock(&adev->grbm_idx_mutex);
3583 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3584 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3585 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3586 			data = gfx_v8_0_get_rb_active_bitmap(adev);
3587 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3588 					       rb_bitmap_width_per_sh);
3589 		}
3590 	}
3591 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3592 
3593 	adev->gfx.config.backend_enable_mask = active_rbs;
3594 	adev->gfx.config.num_rbs = hweight32(active_rbs);
3595 
3596 	num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3597 			     adev->gfx.config.max_shader_engines, 16);
3598 
3599 	gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3600 
3601 	if (!adev->gfx.config.backend_enable_mask ||
3602 			adev->gfx.config.num_rbs >= num_rb_pipes) {
3603 		WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3604 		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3605 	} else {
3606 		gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3607 							adev->gfx.config.backend_enable_mask,
3608 							num_rb_pipes);
3609 	}
3610 
3611 	/* cache the values for userspace */
3612 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3613 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3614 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3615 			adev->gfx.config.rb_config[i][j].rb_backend_disable =
3616 				RREG32(mmCC_RB_BACKEND_DISABLE);
3617 			adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3618 				RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3619 			adev->gfx.config.rb_config[i][j].raster_config =
3620 				RREG32(mmPA_SC_RASTER_CONFIG);
3621 			adev->gfx.config.rb_config[i][j].raster_config_1 =
3622 				RREG32(mmPA_SC_RASTER_CONFIG_1);
3623 		}
3624 	}
3625 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3626 	mutex_unlock(&adev->grbm_idx_mutex);
3627 }
3628 
3629 /**
3630  * gfx_v8_0_init_compute_vmid - gart enable
3631  *
3632  * @adev: amdgpu_device pointer
3633  *
3634  * Initialize compute vmid sh_mem registers
3635  *
3636  */
3637 #define DEFAULT_SH_MEM_BASES	(0x6000)
3638 #define FIRST_COMPUTE_VMID	(8)
3639 #define LAST_COMPUTE_VMID	(16)
3640 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3641 {
3642 	int i;
3643 	uint32_t sh_mem_config;
3644 	uint32_t sh_mem_bases;
3645 
3646 	/*
3647 	 * Configure apertures:
3648 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3649 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3650 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3651 	 */
3652 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3653 
3654 	sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3655 			SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3656 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3657 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3658 			MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3659 			SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3660 
3661 	mutex_lock(&adev->srbm_mutex);
3662 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3663 		vi_srbm_select(adev, 0, 0, 0, i);
3664 		/* CP and shaders */
3665 		WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3666 		WREG32(mmSH_MEM_APE1_BASE, 1);
3667 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3668 		WREG32(mmSH_MEM_BASES, sh_mem_bases);
3669 	}
3670 	vi_srbm_select(adev, 0, 0, 0, 0);
3671 	mutex_unlock(&adev->srbm_mutex);
3672 }
3673 
3674 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3675 {
3676 	switch (adev->asic_type) {
3677 	default:
3678 		adev->gfx.config.double_offchip_lds_buf = 1;
3679 		break;
3680 	case CHIP_CARRIZO:
3681 	case CHIP_STONEY:
3682 		adev->gfx.config.double_offchip_lds_buf = 0;
3683 		break;
3684 	}
3685 }
3686 
3687 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3688 {
3689 	u32 tmp, sh_static_mem_cfg;
3690 	int i;
3691 
3692 	WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3693 	WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3694 	WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3695 	WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3696 
3697 	gfx_v8_0_tiling_mode_table_init(adev);
3698 	gfx_v8_0_setup_rb(adev);
3699 	gfx_v8_0_get_cu_info(adev);
3700 	gfx_v8_0_config_init(adev);
3701 
3702 	/* XXX SH_MEM regs */
3703 	/* where to put LDS, scratch, GPUVM in FSA64 space */
3704 	sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3705 				   SWIZZLE_ENABLE, 1);
3706 	sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3707 				   ELEMENT_SIZE, 1);
3708 	sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3709 				   INDEX_STRIDE, 3);
3710 	WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3711 
3712 	mutex_lock(&adev->srbm_mutex);
3713 	for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3714 		vi_srbm_select(adev, 0, 0, 0, i);
3715 		/* CP and shaders */
3716 		if (i == 0) {
3717 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3718 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3719 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3720 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3721 			WREG32(mmSH_MEM_CONFIG, tmp);
3722 			WREG32(mmSH_MEM_BASES, 0);
3723 		} else {
3724 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3725 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3726 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3727 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3728 			WREG32(mmSH_MEM_CONFIG, tmp);
3729 			tmp = adev->mc.shared_aperture_start >> 48;
3730 			WREG32(mmSH_MEM_BASES, tmp);
3731 		}
3732 
3733 		WREG32(mmSH_MEM_APE1_BASE, 1);
3734 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3735 	}
3736 	vi_srbm_select(adev, 0, 0, 0, 0);
3737 	mutex_unlock(&adev->srbm_mutex);
3738 
3739 	gfx_v8_0_init_compute_vmid(adev);
3740 
3741 	mutex_lock(&adev->grbm_idx_mutex);
3742 	/*
3743 	 * making sure that the following register writes will be broadcasted
3744 	 * to all the shaders
3745 	 */
3746 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3747 
3748 	WREG32(mmPA_SC_FIFO_SIZE,
3749 		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
3750 			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3751 		   (adev->gfx.config.sc_prim_fifo_size_backend <<
3752 			PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3753 		   (adev->gfx.config.sc_hiz_tile_fifo_size <<
3754 			PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3755 		   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3756 			PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3757 
3758 	tmp = RREG32(mmSPI_ARB_PRIORITY);
3759 	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3760 	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3761 	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3762 	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3763 	WREG32(mmSPI_ARB_PRIORITY, tmp);
3764 
3765 	mutex_unlock(&adev->grbm_idx_mutex);
3766 
3767 }
3768 
3769 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3770 {
3771 	u32 i, j, k;
3772 	u32 mask;
3773 
3774 	mutex_lock(&adev->grbm_idx_mutex);
3775 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3776 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3777 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3778 			for (k = 0; k < adev->usec_timeout; k++) {
3779 				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3780 					break;
3781 				udelay(1);
3782 			}
3783 		}
3784 	}
3785 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3786 	mutex_unlock(&adev->grbm_idx_mutex);
3787 
3788 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3789 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3790 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3791 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3792 	for (k = 0; k < adev->usec_timeout; k++) {
3793 		if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3794 			break;
3795 		udelay(1);
3796 	}
3797 }
3798 
3799 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3800 					       bool enable)
3801 {
3802 	u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3803 
3804 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3805 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3806 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3807 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3808 
3809 	WREG32(mmCP_INT_CNTL_RING0, tmp);
3810 }
3811 
3812 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3813 {
3814 	/* csib */
3815 	WREG32(mmRLC_CSIB_ADDR_HI,
3816 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
3817 	WREG32(mmRLC_CSIB_ADDR_LO,
3818 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3819 	WREG32(mmRLC_CSIB_LENGTH,
3820 			adev->gfx.rlc.clear_state_size);
3821 }
3822 
3823 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3824 				int ind_offset,
3825 				int list_size,
3826 				int *unique_indices,
3827 				int *indices_count,
3828 				int max_indices,
3829 				int *ind_start_offsets,
3830 				int *offset_count,
3831 				int max_offset)
3832 {
3833 	int indices;
3834 	bool new_entry = true;
3835 
3836 	for (; ind_offset < list_size; ind_offset++) {
3837 
3838 		if (new_entry) {
3839 			new_entry = false;
3840 			ind_start_offsets[*offset_count] = ind_offset;
3841 			*offset_count = *offset_count + 1;
3842 			BUG_ON(*offset_count >= max_offset);
3843 		}
3844 
3845 		if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3846 			new_entry = true;
3847 			continue;
3848 		}
3849 
3850 		ind_offset += 2;
3851 
3852 		/* look for the matching indice */
3853 		for (indices = 0;
3854 			indices < *indices_count;
3855 			indices++) {
3856 			if (unique_indices[indices] ==
3857 				register_list_format[ind_offset])
3858 				break;
3859 		}
3860 
3861 		if (indices >= *indices_count) {
3862 			unique_indices[*indices_count] =
3863 				register_list_format[ind_offset];
3864 			indices = *indices_count;
3865 			*indices_count = *indices_count + 1;
3866 			BUG_ON(*indices_count >= max_indices);
3867 		}
3868 
3869 		register_list_format[ind_offset] = indices;
3870 	}
3871 }
3872 
3873 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3874 {
3875 	int i, temp, data;
3876 	int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3877 	int indices_count = 0;
3878 	int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3879 	int offset_count = 0;
3880 
3881 	int list_size;
3882 	unsigned int *register_list_format =
3883 		kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3884 	if (!register_list_format)
3885 		return -ENOMEM;
3886 	memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3887 			adev->gfx.rlc.reg_list_format_size_bytes);
3888 
3889 	gfx_v8_0_parse_ind_reg_list(register_list_format,
3890 				RLC_FormatDirectRegListLength,
3891 				adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3892 				unique_indices,
3893 				&indices_count,
3894 				sizeof(unique_indices) / sizeof(int),
3895 				indirect_start_offsets,
3896 				&offset_count,
3897 				sizeof(indirect_start_offsets)/sizeof(int));
3898 
3899 	/* save and restore list */
3900 	WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3901 
3902 	WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3903 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3904 		WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3905 
3906 	/* indirect list */
3907 	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3908 	for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3909 		WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3910 
3911 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3912 	list_size = list_size >> 1;
3913 	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3914 	WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3915 
3916 	/* starting offsets starts */
3917 	WREG32(mmRLC_GPM_SCRATCH_ADDR,
3918 		adev->gfx.rlc.starting_offsets_start);
3919 	for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3920 		WREG32(mmRLC_GPM_SCRATCH_DATA,
3921 				indirect_start_offsets[i]);
3922 
3923 	/* unique indices */
3924 	temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3925 	data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3926 	for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3927 		if (unique_indices[i] != 0) {
3928 			WREG32(temp + i, unique_indices[i] & 0x3FFFF);
3929 			WREG32(data + i, unique_indices[i] >> 20);
3930 		}
3931 	}
3932 	kfree(register_list_format);
3933 
3934 	return 0;
3935 }
3936 
3937 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3938 {
3939 	WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3940 }
3941 
3942 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3943 {
3944 	uint32_t data;
3945 
3946 	WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
3947 
3948 	data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
3949 	data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
3950 	data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
3951 	data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
3952 	WREG32(mmRLC_PG_DELAY, data);
3953 
3954 	WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
3955 	WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
3956 
3957 }
3958 
3959 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
3960 						bool enable)
3961 {
3962 	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
3963 }
3964 
3965 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
3966 						  bool enable)
3967 {
3968 	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
3969 }
3970 
3971 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
3972 {
3973 	WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
3974 }
3975 
3976 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
3977 {
3978 	if ((adev->asic_type == CHIP_CARRIZO) ||
3979 	    (adev->asic_type == CHIP_STONEY)) {
3980 		gfx_v8_0_init_csb(adev);
3981 		gfx_v8_0_init_save_restore_list(adev);
3982 		gfx_v8_0_enable_save_restore_machine(adev);
3983 		WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
3984 		gfx_v8_0_init_power_gating(adev);
3985 		WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
3986 	} else if ((adev->asic_type == CHIP_POLARIS11) ||
3987 		   (adev->asic_type == CHIP_POLARIS12)) {
3988 		gfx_v8_0_init_csb(adev);
3989 		gfx_v8_0_init_save_restore_list(adev);
3990 		gfx_v8_0_enable_save_restore_machine(adev);
3991 		gfx_v8_0_init_power_gating(adev);
3992 	}
3993 
3994 }
3995 
3996 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
3997 {
3998 	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
3999 
4000 	gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4001 	gfx_v8_0_wait_for_rlc_serdes(adev);
4002 }
4003 
4004 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4005 {
4006 	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4007 	udelay(50);
4008 
4009 	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4010 	udelay(50);
4011 }
4012 
4013 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4014 {
4015 	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4016 
4017 	/* carrizo do enable cp interrupt after cp inited */
4018 	if (!(adev->flags & AMD_IS_APU))
4019 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4020 
4021 	udelay(50);
4022 }
4023 
4024 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4025 {
4026 	const struct rlc_firmware_header_v2_0 *hdr;
4027 	const __le32 *fw_data;
4028 	unsigned i, fw_size;
4029 
4030 	if (!adev->gfx.rlc_fw)
4031 		return -EINVAL;
4032 
4033 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4034 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
4035 
4036 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4037 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4038 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4039 
4040 	WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4041 	for (i = 0; i < fw_size; i++)
4042 		WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4043 	WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4044 
4045 	return 0;
4046 }
4047 
4048 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4049 {
4050 	int r;
4051 	u32 tmp;
4052 
4053 	gfx_v8_0_rlc_stop(adev);
4054 
4055 	/* disable CG */
4056 	tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4057 	tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4058 		 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4059 	WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4060 	if (adev->asic_type == CHIP_POLARIS11 ||
4061 	    adev->asic_type == CHIP_POLARIS10 ||
4062 	    adev->asic_type == CHIP_POLARIS12) {
4063 		tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4064 		tmp &= ~0x3;
4065 		WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4066 	}
4067 
4068 	/* disable PG */
4069 	WREG32(mmRLC_PG_CNTL, 0);
4070 
4071 	gfx_v8_0_rlc_reset(adev);
4072 	gfx_v8_0_init_pg(adev);
4073 
4074 	if (!adev->pp_enabled) {
4075 		if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
4076 			/* legacy rlc firmware loading */
4077 			r = gfx_v8_0_rlc_load_microcode(adev);
4078 			if (r)
4079 				return r;
4080 		} else {
4081 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4082 							AMDGPU_UCODE_ID_RLC_G);
4083 			if (r)
4084 				return -EINVAL;
4085 		}
4086 	}
4087 
4088 	gfx_v8_0_rlc_start(adev);
4089 
4090 	return 0;
4091 }
4092 
4093 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4094 {
4095 	int i;
4096 	u32 tmp = RREG32(mmCP_ME_CNTL);
4097 
4098 	if (enable) {
4099 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4100 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4101 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4102 	} else {
4103 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4104 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4105 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4106 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4107 			adev->gfx.gfx_ring[i].ready = false;
4108 	}
4109 	WREG32(mmCP_ME_CNTL, tmp);
4110 	udelay(50);
4111 }
4112 
4113 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4114 {
4115 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
4116 	const struct gfx_firmware_header_v1_0 *ce_hdr;
4117 	const struct gfx_firmware_header_v1_0 *me_hdr;
4118 	const __le32 *fw_data;
4119 	unsigned i, fw_size;
4120 
4121 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4122 		return -EINVAL;
4123 
4124 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4125 		adev->gfx.pfp_fw->data;
4126 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4127 		adev->gfx.ce_fw->data;
4128 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
4129 		adev->gfx.me_fw->data;
4130 
4131 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4132 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4133 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4134 
4135 	gfx_v8_0_cp_gfx_enable(adev, false);
4136 
4137 	/* PFP */
4138 	fw_data = (const __le32 *)
4139 		(adev->gfx.pfp_fw->data +
4140 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4141 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4142 	WREG32(mmCP_PFP_UCODE_ADDR, 0);
4143 	for (i = 0; i < fw_size; i++)
4144 		WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4145 	WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4146 
4147 	/* CE */
4148 	fw_data = (const __le32 *)
4149 		(adev->gfx.ce_fw->data +
4150 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4151 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4152 	WREG32(mmCP_CE_UCODE_ADDR, 0);
4153 	for (i = 0; i < fw_size; i++)
4154 		WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4155 	WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4156 
4157 	/* ME */
4158 	fw_data = (const __le32 *)
4159 		(adev->gfx.me_fw->data +
4160 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4161 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4162 	WREG32(mmCP_ME_RAM_WADDR, 0);
4163 	for (i = 0; i < fw_size; i++)
4164 		WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4165 	WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4166 
4167 	return 0;
4168 }
4169 
4170 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4171 {
4172 	u32 count = 0;
4173 	const struct cs_section_def *sect = NULL;
4174 	const struct cs_extent_def *ext = NULL;
4175 
4176 	/* begin clear state */
4177 	count += 2;
4178 	/* context control state */
4179 	count += 3;
4180 
4181 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4182 		for (ext = sect->section; ext->extent != NULL; ++ext) {
4183 			if (sect->id == SECT_CONTEXT)
4184 				count += 2 + ext->reg_count;
4185 			else
4186 				return 0;
4187 		}
4188 	}
4189 	/* pa_sc_raster_config/pa_sc_raster_config1 */
4190 	count += 4;
4191 	/* end clear state */
4192 	count += 2;
4193 	/* clear state */
4194 	count += 2;
4195 
4196 	return count;
4197 }
4198 
4199 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4200 {
4201 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4202 	const struct cs_section_def *sect = NULL;
4203 	const struct cs_extent_def *ext = NULL;
4204 	int r, i;
4205 
4206 	/* init the CP */
4207 	WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4208 	WREG32(mmCP_ENDIAN_SWAP, 0);
4209 	WREG32(mmCP_DEVICE_ID, 1);
4210 
4211 	gfx_v8_0_cp_gfx_enable(adev, true);
4212 
4213 	r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4214 	if (r) {
4215 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4216 		return r;
4217 	}
4218 
4219 	/* clear state buffer */
4220 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4221 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4222 
4223 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4224 	amdgpu_ring_write(ring, 0x80000000);
4225 	amdgpu_ring_write(ring, 0x80000000);
4226 
4227 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4228 		for (ext = sect->section; ext->extent != NULL; ++ext) {
4229 			if (sect->id == SECT_CONTEXT) {
4230 				amdgpu_ring_write(ring,
4231 				       PACKET3(PACKET3_SET_CONTEXT_REG,
4232 					       ext->reg_count));
4233 				amdgpu_ring_write(ring,
4234 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4235 				for (i = 0; i < ext->reg_count; i++)
4236 					amdgpu_ring_write(ring, ext->extent[i]);
4237 			}
4238 		}
4239 	}
4240 
4241 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4242 	amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4243 	switch (adev->asic_type) {
4244 	case CHIP_TONGA:
4245 	case CHIP_POLARIS10:
4246 		amdgpu_ring_write(ring, 0x16000012);
4247 		amdgpu_ring_write(ring, 0x0000002A);
4248 		break;
4249 	case CHIP_POLARIS11:
4250 	case CHIP_POLARIS12:
4251 		amdgpu_ring_write(ring, 0x16000012);
4252 		amdgpu_ring_write(ring, 0x00000000);
4253 		break;
4254 	case CHIP_FIJI:
4255 		amdgpu_ring_write(ring, 0x3a00161a);
4256 		amdgpu_ring_write(ring, 0x0000002e);
4257 		break;
4258 	case CHIP_CARRIZO:
4259 		amdgpu_ring_write(ring, 0x00000002);
4260 		amdgpu_ring_write(ring, 0x00000000);
4261 		break;
4262 	case CHIP_TOPAZ:
4263 		amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4264 				0x00000000 : 0x00000002);
4265 		amdgpu_ring_write(ring, 0x00000000);
4266 		break;
4267 	case CHIP_STONEY:
4268 		amdgpu_ring_write(ring, 0x00000000);
4269 		amdgpu_ring_write(ring, 0x00000000);
4270 		break;
4271 	default:
4272 		BUG();
4273 	}
4274 
4275 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4276 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4277 
4278 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4279 	amdgpu_ring_write(ring, 0);
4280 
4281 	/* init the CE partitions */
4282 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4283 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4284 	amdgpu_ring_write(ring, 0x8000);
4285 	amdgpu_ring_write(ring, 0x8000);
4286 
4287 	amdgpu_ring_commit(ring);
4288 
4289 	return 0;
4290 }
4291 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4292 {
4293 	u32 tmp;
4294 	/* no gfx doorbells on iceland */
4295 	if (adev->asic_type == CHIP_TOPAZ)
4296 		return;
4297 
4298 	tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4299 
4300 	if (ring->use_doorbell) {
4301 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4302 				DOORBELL_OFFSET, ring->doorbell_index);
4303 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4304 						DOORBELL_HIT, 0);
4305 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4306 					    DOORBELL_EN, 1);
4307 	} else {
4308 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4309 	}
4310 
4311 	WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4312 
4313 	if (adev->flags & AMD_IS_APU)
4314 		return;
4315 
4316 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4317 					DOORBELL_RANGE_LOWER,
4318 					AMDGPU_DOORBELL_GFX_RING0);
4319 	WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4320 
4321 	WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4322 		CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4323 }
4324 
4325 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4326 {
4327 	struct amdgpu_ring *ring;
4328 	u32 tmp;
4329 	u32 rb_bufsz;
4330 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
4331 	int r;
4332 
4333 	/* Set the write pointer delay */
4334 	WREG32(mmCP_RB_WPTR_DELAY, 0);
4335 
4336 	/* set the RB to use vmid 0 */
4337 	WREG32(mmCP_RB_VMID, 0);
4338 
4339 	/* Set ring buffer size */
4340 	ring = &adev->gfx.gfx_ring[0];
4341 	rb_bufsz = order_base_2(ring->ring_size / 8);
4342 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4343 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4344 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4345 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4346 #ifdef __BIG_ENDIAN
4347 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4348 #endif
4349 	WREG32(mmCP_RB0_CNTL, tmp);
4350 
4351 	/* Initialize the ring buffer's read and write pointers */
4352 	WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4353 	ring->wptr = 0;
4354 	WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4355 
4356 	/* set the wb address wether it's enabled or not */
4357 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4358 	WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4359 	WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4360 
4361 	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4362 	WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4363 	WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4364 	mdelay(1);
4365 	WREG32(mmCP_RB0_CNTL, tmp);
4366 
4367 	rb_addr = ring->gpu_addr >> 8;
4368 	WREG32(mmCP_RB0_BASE, rb_addr);
4369 	WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4370 
4371 	gfx_v8_0_set_cpg_door_bell(adev, ring);
4372 	/* start the ring */
4373 	amdgpu_ring_clear_ring(ring);
4374 	gfx_v8_0_cp_gfx_start(adev);
4375 	ring->ready = true;
4376 	r = amdgpu_ring_test_ring(ring);
4377 	if (r)
4378 		ring->ready = false;
4379 
4380 	return r;
4381 }
4382 
4383 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4384 {
4385 	int i;
4386 
4387 	if (enable) {
4388 		WREG32(mmCP_MEC_CNTL, 0);
4389 	} else {
4390 		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4391 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
4392 			adev->gfx.compute_ring[i].ready = false;
4393 		adev->gfx.kiq.ring.ready = false;
4394 	}
4395 	udelay(50);
4396 }
4397 
4398 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4399 {
4400 	const struct gfx_firmware_header_v1_0 *mec_hdr;
4401 	const __le32 *fw_data;
4402 	unsigned i, fw_size;
4403 
4404 	if (!adev->gfx.mec_fw)
4405 		return -EINVAL;
4406 
4407 	gfx_v8_0_cp_compute_enable(adev, false);
4408 
4409 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4410 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4411 
4412 	fw_data = (const __le32 *)
4413 		(adev->gfx.mec_fw->data +
4414 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4415 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4416 
4417 	/* MEC1 */
4418 	WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4419 	for (i = 0; i < fw_size; i++)
4420 		WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4421 	WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4422 
4423 	/* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4424 	if (adev->gfx.mec2_fw) {
4425 		const struct gfx_firmware_header_v1_0 *mec2_hdr;
4426 
4427 		mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4428 		amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4429 
4430 		fw_data = (const __le32 *)
4431 			(adev->gfx.mec2_fw->data +
4432 			 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4433 		fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4434 
4435 		WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4436 		for (i = 0; i < fw_size; i++)
4437 			WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4438 		WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4439 	}
4440 
4441 	return 0;
4442 }
4443 
4444 /* KIQ functions */
4445 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4446 {
4447 	uint32_t tmp;
4448 	struct amdgpu_device *adev = ring->adev;
4449 
4450 	/* tell RLC which is KIQ queue */
4451 	tmp = RREG32(mmRLC_CP_SCHEDULERS);
4452 	tmp &= 0xffffff00;
4453 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4454 	WREG32(mmRLC_CP_SCHEDULERS, tmp);
4455 	tmp |= 0x80;
4456 	WREG32(mmRLC_CP_SCHEDULERS, tmp);
4457 }
4458 
4459 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4460 {
4461 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4462 	uint32_t scratch, tmp = 0;
4463 	uint64_t queue_mask = 0;
4464 	int r, i;
4465 
4466 	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4467 		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4468 			continue;
4469 
4470 		/* This situation may be hit in the future if a new HW
4471 		 * generation exposes more than 64 queues. If so, the
4472 		 * definition of queue_mask needs updating */
4473 		if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4474 			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4475 			break;
4476 		}
4477 
4478 		queue_mask |= (1ull << i);
4479 	}
4480 
4481 	r = amdgpu_gfx_scratch_get(adev, &scratch);
4482 	if (r) {
4483 		DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4484 		return r;
4485 	}
4486 	WREG32(scratch, 0xCAFEDEAD);
4487 
4488 	r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
4489 	if (r) {
4490 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4491 		amdgpu_gfx_scratch_free(adev, scratch);
4492 		return r;
4493 	}
4494 	/* set resources */
4495 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4496 	amdgpu_ring_write(kiq_ring, 0);	/* vmid_mask:0 queue_type:0 (KIQ) */
4497 	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
4498 	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
4499 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
4500 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
4501 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
4502 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
4503 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4504 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4505 		uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4506 		uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4507 
4508 		/* map queues */
4509 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4510 		/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4511 		amdgpu_ring_write(kiq_ring,
4512 				  PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4513 		amdgpu_ring_write(kiq_ring,
4514 				  PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4515 				  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4516 				  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4517 				  PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4518 		amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4519 		amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4520 		amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4521 		amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4522 	}
4523 	/* write to scratch for completion */
4524 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4525 	amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4526 	amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4527 	amdgpu_ring_commit(kiq_ring);
4528 
4529 	for (i = 0; i < adev->usec_timeout; i++) {
4530 		tmp = RREG32(scratch);
4531 		if (tmp == 0xDEADBEEF)
4532 			break;
4533 		DRM_UDELAY(1);
4534 	}
4535 	if (i >= adev->usec_timeout) {
4536 		DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4537 			  scratch, tmp);
4538 		r = -EINVAL;
4539 	}
4540 	amdgpu_gfx_scratch_free(adev, scratch);
4541 
4542 	return r;
4543 }
4544 
4545 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4546 {
4547 	int i, r = 0;
4548 
4549 	if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4550 		WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4551 		for (i = 0; i < adev->usec_timeout; i++) {
4552 			if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4553 				break;
4554 			udelay(1);
4555 		}
4556 		if (i == adev->usec_timeout)
4557 			r = -ETIMEDOUT;
4558 	}
4559 	WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4560 	WREG32(mmCP_HQD_PQ_RPTR, 0);
4561 	WREG32(mmCP_HQD_PQ_WPTR, 0);
4562 
4563 	return r;
4564 }
4565 
4566 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4567 {
4568 	struct amdgpu_device *adev = ring->adev;
4569 	struct vi_mqd *mqd = ring->mqd_ptr;
4570 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4571 	uint32_t tmp;
4572 
4573 	mqd->header = 0xC0310800;
4574 	mqd->compute_pipelinestat_enable = 0x00000001;
4575 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4576 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4577 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4578 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4579 	mqd->compute_misc_reserved = 0x00000003;
4580 	if (!(adev->flags & AMD_IS_APU)) {
4581 		mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4582 					     + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4583 		mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4584 					     + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4585 	}
4586 	eop_base_addr = ring->eop_gpu_addr >> 8;
4587 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4588 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4589 
4590 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4591 	tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4592 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4593 			(order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4594 
4595 	mqd->cp_hqd_eop_control = tmp;
4596 
4597 	/* enable doorbell? */
4598 	tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4599 			    CP_HQD_PQ_DOORBELL_CONTROL,
4600 			    DOORBELL_EN,
4601 			    ring->use_doorbell ? 1 : 0);
4602 
4603 	mqd->cp_hqd_pq_doorbell_control = tmp;
4604 
4605 	/* set the pointer to the MQD */
4606 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4607 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4608 
4609 	/* set MQD vmid to 0 */
4610 	tmp = RREG32(mmCP_MQD_CONTROL);
4611 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4612 	mqd->cp_mqd_control = tmp;
4613 
4614 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4615 	hqd_gpu_addr = ring->gpu_addr >> 8;
4616 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4617 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4618 
4619 	/* set up the HQD, this is similar to CP_RB0_CNTL */
4620 	tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4621 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4622 			    (order_base_2(ring->ring_size / 4) - 1));
4623 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4624 			((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4625 #ifdef __BIG_ENDIAN
4626 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4627 #endif
4628 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4629 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4630 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4631 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4632 	mqd->cp_hqd_pq_control = tmp;
4633 
4634 	/* set the wb address whether it's enabled or not */
4635 	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4636 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4637 	mqd->cp_hqd_pq_rptr_report_addr_hi =
4638 		upper_32_bits(wb_gpu_addr) & 0xffff;
4639 
4640 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4641 	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4642 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4643 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4644 
4645 	tmp = 0;
4646 	/* enable the doorbell if requested */
4647 	if (ring->use_doorbell) {
4648 		tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4649 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4650 				DOORBELL_OFFSET, ring->doorbell_index);
4651 
4652 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4653 					 DOORBELL_EN, 1);
4654 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4655 					 DOORBELL_SOURCE, 0);
4656 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4657 					 DOORBELL_HIT, 0);
4658 	}
4659 
4660 	mqd->cp_hqd_pq_doorbell_control = tmp;
4661 
4662 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4663 	ring->wptr = 0;
4664 	mqd->cp_hqd_pq_wptr = ring->wptr;
4665 	mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4666 
4667 	/* set the vmid for the queue */
4668 	mqd->cp_hqd_vmid = 0;
4669 
4670 	tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4671 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4672 	mqd->cp_hqd_persistent_state = tmp;
4673 
4674 	/* set MTYPE */
4675 	tmp = RREG32(mmCP_HQD_IB_CONTROL);
4676 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4677 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4678 	mqd->cp_hqd_ib_control = tmp;
4679 
4680 	tmp = RREG32(mmCP_HQD_IQ_TIMER);
4681 	tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4682 	mqd->cp_hqd_iq_timer = tmp;
4683 
4684 	tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4685 	tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4686 	mqd->cp_hqd_ctx_save_control = tmp;
4687 
4688 	/* defaults */
4689 	mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4690 	mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4691 	mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4692 	mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4693 	mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4694 	mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4695 	mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4696 	mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4697 	mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4698 	mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4699 	mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4700 	mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4701 	mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4702 	mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4703 	mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4704 
4705 	/* activate the queue */
4706 	mqd->cp_hqd_active = 1;
4707 
4708 	return 0;
4709 }
4710 
4711 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4712 			struct vi_mqd *mqd)
4713 {
4714 	uint32_t mqd_reg;
4715 	uint32_t *mqd_data;
4716 
4717 	/* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4718 	mqd_data = &mqd->cp_mqd_base_addr_lo;
4719 
4720 	/* disable wptr polling */
4721 	WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4722 
4723 	/* program all HQD registers */
4724 	for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4725 		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4726 
4727 	/* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4728 	 * This is safe since EOP RPTR==WPTR for any inactive HQD
4729 	 * on ASICs that do not support context-save.
4730 	 * EOP writes/reads can start anywhere in the ring.
4731 	 */
4732 	if (adev->asic_type != CHIP_TONGA) {
4733 		WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4734 		WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4735 		WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4736 	}
4737 
4738 	for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4739 		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4740 
4741 	/* activate the HQD */
4742 	for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4743 		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4744 
4745 	return 0;
4746 }
4747 
4748 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4749 {
4750 	struct amdgpu_device *adev = ring->adev;
4751 	struct vi_mqd *mqd = ring->mqd_ptr;
4752 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4753 
4754 	gfx_v8_0_kiq_setting(ring);
4755 
4756 	if (adev->gfx.in_reset) { /* for GPU_RESET case */
4757 		/* reset MQD to a clean status */
4758 		if (adev->gfx.mec.mqd_backup[mqd_idx])
4759 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4760 
4761 		/* reset ring buffer */
4762 		ring->wptr = 0;
4763 		amdgpu_ring_clear_ring(ring);
4764 		mutex_lock(&adev->srbm_mutex);
4765 		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4766 		gfx_v8_0_mqd_commit(adev, mqd);
4767 		vi_srbm_select(adev, 0, 0, 0, 0);
4768 		mutex_unlock(&adev->srbm_mutex);
4769 	} else {
4770 		memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4771 		((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4772 		((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4773 		mutex_lock(&adev->srbm_mutex);
4774 		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4775 		gfx_v8_0_mqd_init(ring);
4776 		gfx_v8_0_mqd_commit(adev, mqd);
4777 		vi_srbm_select(adev, 0, 0, 0, 0);
4778 		mutex_unlock(&adev->srbm_mutex);
4779 
4780 		if (adev->gfx.mec.mqd_backup[mqd_idx])
4781 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4782 	}
4783 
4784 	return 0;
4785 }
4786 
4787 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4788 {
4789 	struct amdgpu_device *adev = ring->adev;
4790 	struct vi_mqd *mqd = ring->mqd_ptr;
4791 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
4792 
4793 	if (!adev->gfx.in_reset && !adev->gfx.in_suspend) {
4794 		memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4795 		((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4796 		((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4797 		mutex_lock(&adev->srbm_mutex);
4798 		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4799 		gfx_v8_0_mqd_init(ring);
4800 		vi_srbm_select(adev, 0, 0, 0, 0);
4801 		mutex_unlock(&adev->srbm_mutex);
4802 
4803 		if (adev->gfx.mec.mqd_backup[mqd_idx])
4804 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4805 	} else if (adev->gfx.in_reset) { /* for GPU_RESET case */
4806 		/* reset MQD to a clean status */
4807 		if (adev->gfx.mec.mqd_backup[mqd_idx])
4808 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4809 		/* reset ring buffer */
4810 		ring->wptr = 0;
4811 		amdgpu_ring_clear_ring(ring);
4812 	} else {
4813 		amdgpu_ring_clear_ring(ring);
4814 	}
4815 	return 0;
4816 }
4817 
4818 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4819 {
4820 	if (adev->asic_type > CHIP_TONGA) {
4821 		WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4822 		WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4823 	}
4824 	/* enable doorbells */
4825 	WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4826 }
4827 
4828 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4829 {
4830 	struct amdgpu_ring *ring = NULL;
4831 	int r = 0, i;
4832 
4833 	gfx_v8_0_cp_compute_enable(adev, true);
4834 
4835 	ring = &adev->gfx.kiq.ring;
4836 
4837 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
4838 	if (unlikely(r != 0))
4839 		goto done;
4840 
4841 	r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4842 	if (!r) {
4843 		r = gfx_v8_0_kiq_init_queue(ring);
4844 		amdgpu_bo_kunmap(ring->mqd_obj);
4845 		ring->mqd_ptr = NULL;
4846 	}
4847 	amdgpu_bo_unreserve(ring->mqd_obj);
4848 	if (r)
4849 		goto done;
4850 
4851 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4852 		ring = &adev->gfx.compute_ring[i];
4853 
4854 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
4855 		if (unlikely(r != 0))
4856 			goto done;
4857 		r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4858 		if (!r) {
4859 			r = gfx_v8_0_kcq_init_queue(ring);
4860 			amdgpu_bo_kunmap(ring->mqd_obj);
4861 			ring->mqd_ptr = NULL;
4862 		}
4863 		amdgpu_bo_unreserve(ring->mqd_obj);
4864 		if (r)
4865 			goto done;
4866 	}
4867 
4868 	gfx_v8_0_set_mec_doorbell_range(adev);
4869 
4870 	r = gfx_v8_0_kiq_kcq_enable(adev);
4871 	if (r)
4872 		goto done;
4873 
4874 	/* Test KIQ */
4875 	ring = &adev->gfx.kiq.ring;
4876 	ring->ready = true;
4877 	r = amdgpu_ring_test_ring(ring);
4878 	if (r) {
4879 		ring->ready = false;
4880 		goto done;
4881 	}
4882 
4883 	/* Test KCQs */
4884 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4885 		ring = &adev->gfx.compute_ring[i];
4886 		ring->ready = true;
4887 		r = amdgpu_ring_test_ring(ring);
4888 		if (r)
4889 			ring->ready = false;
4890 	}
4891 
4892 done:
4893 	return r;
4894 }
4895 
4896 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4897 {
4898 	int r;
4899 
4900 	if (!(adev->flags & AMD_IS_APU))
4901 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4902 
4903 	if (!adev->pp_enabled) {
4904 		if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
4905 			/* legacy firmware loading */
4906 			r = gfx_v8_0_cp_gfx_load_microcode(adev);
4907 			if (r)
4908 				return r;
4909 
4910 			r = gfx_v8_0_cp_compute_load_microcode(adev);
4911 			if (r)
4912 				return r;
4913 		} else {
4914 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4915 							AMDGPU_UCODE_ID_CP_CE);
4916 			if (r)
4917 				return -EINVAL;
4918 
4919 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4920 							AMDGPU_UCODE_ID_CP_PFP);
4921 			if (r)
4922 				return -EINVAL;
4923 
4924 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4925 							AMDGPU_UCODE_ID_CP_ME);
4926 			if (r)
4927 				return -EINVAL;
4928 
4929 			if (adev->asic_type == CHIP_TOPAZ) {
4930 				r = gfx_v8_0_cp_compute_load_microcode(adev);
4931 				if (r)
4932 					return r;
4933 			} else {
4934 				r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4935 										 AMDGPU_UCODE_ID_CP_MEC1);
4936 				if (r)
4937 					return -EINVAL;
4938 			}
4939 		}
4940 	}
4941 
4942 	r = gfx_v8_0_cp_gfx_resume(adev);
4943 	if (r)
4944 		return r;
4945 
4946 	r = gfx_v8_0_kiq_resume(adev);
4947 	if (r)
4948 		return r;
4949 
4950 	gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4951 
4952 	return 0;
4953 }
4954 
4955 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4956 {
4957 	gfx_v8_0_cp_gfx_enable(adev, enable);
4958 	gfx_v8_0_cp_compute_enable(adev, enable);
4959 }
4960 
4961 static int gfx_v8_0_hw_init(void *handle)
4962 {
4963 	int r;
4964 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4965 
4966 	gfx_v8_0_init_golden_registers(adev);
4967 	gfx_v8_0_gpu_init(adev);
4968 
4969 	r = gfx_v8_0_rlc_resume(adev);
4970 	if (r)
4971 		return r;
4972 
4973 	r = gfx_v8_0_cp_resume(adev);
4974 
4975 	return r;
4976 }
4977 
4978 static int gfx_v8_0_hw_fini(void *handle)
4979 {
4980 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4981 
4982 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4983 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4984 	if (amdgpu_sriov_vf(adev)) {
4985 		pr_debug("For SRIOV client, shouldn't do anything.\n");
4986 		return 0;
4987 	}
4988 	gfx_v8_0_cp_enable(adev, false);
4989 	gfx_v8_0_rlc_stop(adev);
4990 
4991 	amdgpu_set_powergating_state(adev,
4992 			AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
4993 
4994 	return 0;
4995 }
4996 
4997 static int gfx_v8_0_suspend(void *handle)
4998 {
4999 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5000 	adev->gfx.in_suspend = true;
5001 	return gfx_v8_0_hw_fini(adev);
5002 }
5003 
5004 static int gfx_v8_0_resume(void *handle)
5005 {
5006 	int r;
5007 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5008 
5009 	r = gfx_v8_0_hw_init(adev);
5010 	adev->gfx.in_suspend = false;
5011 	return r;
5012 }
5013 
5014 static bool gfx_v8_0_is_idle(void *handle)
5015 {
5016 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5017 
5018 	if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5019 		return false;
5020 	else
5021 		return true;
5022 }
5023 
5024 static int gfx_v8_0_wait_for_idle(void *handle)
5025 {
5026 	unsigned i;
5027 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5028 
5029 	for (i = 0; i < adev->usec_timeout; i++) {
5030 		if (gfx_v8_0_is_idle(handle))
5031 			return 0;
5032 
5033 		udelay(1);
5034 	}
5035 	return -ETIMEDOUT;
5036 }
5037 
5038 static bool gfx_v8_0_check_soft_reset(void *handle)
5039 {
5040 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5041 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5042 	u32 tmp;
5043 
5044 	/* GRBM_STATUS */
5045 	tmp = RREG32(mmGRBM_STATUS);
5046 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5047 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5048 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5049 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5050 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5051 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5052 		   GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5053 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5054 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5055 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5056 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5057 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5058 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5059 	}
5060 
5061 	/* GRBM_STATUS2 */
5062 	tmp = RREG32(mmGRBM_STATUS2);
5063 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5064 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5065 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5066 
5067 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5068 	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5069 	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5070 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5071 						SOFT_RESET_CPF, 1);
5072 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5073 						SOFT_RESET_CPC, 1);
5074 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5075 						SOFT_RESET_CPG, 1);
5076 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5077 						SOFT_RESET_GRBM, 1);
5078 	}
5079 
5080 	/* SRBM_STATUS */
5081 	tmp = RREG32(mmSRBM_STATUS);
5082 	if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5083 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5084 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5085 	if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5086 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5087 						SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5088 
5089 	if (grbm_soft_reset || srbm_soft_reset) {
5090 		adev->gfx.grbm_soft_reset = grbm_soft_reset;
5091 		adev->gfx.srbm_soft_reset = srbm_soft_reset;
5092 		return true;
5093 	} else {
5094 		adev->gfx.grbm_soft_reset = 0;
5095 		adev->gfx.srbm_soft_reset = 0;
5096 		return false;
5097 	}
5098 }
5099 
5100 static int gfx_v8_0_pre_soft_reset(void *handle)
5101 {
5102 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5103 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5104 
5105 	if ((!adev->gfx.grbm_soft_reset) &&
5106 	    (!adev->gfx.srbm_soft_reset))
5107 		return 0;
5108 
5109 	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5110 	srbm_soft_reset = adev->gfx.srbm_soft_reset;
5111 
5112 	/* stop the rlc */
5113 	gfx_v8_0_rlc_stop(adev);
5114 
5115 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5116 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5117 		/* Disable GFX parsing/prefetching */
5118 		gfx_v8_0_cp_gfx_enable(adev, false);
5119 
5120 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5121 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5122 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5123 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5124 		int i;
5125 
5126 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5127 			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5128 
5129 			mutex_lock(&adev->srbm_mutex);
5130 			vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5131 			gfx_v8_0_deactivate_hqd(adev, 2);
5132 			vi_srbm_select(adev, 0, 0, 0, 0);
5133 			mutex_unlock(&adev->srbm_mutex);
5134 		}
5135 		/* Disable MEC parsing/prefetching */
5136 		gfx_v8_0_cp_compute_enable(adev, false);
5137 	}
5138 
5139        return 0;
5140 }
5141 
5142 static int gfx_v8_0_soft_reset(void *handle)
5143 {
5144 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5145 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5146 	u32 tmp;
5147 
5148 	if ((!adev->gfx.grbm_soft_reset) &&
5149 	    (!adev->gfx.srbm_soft_reset))
5150 		return 0;
5151 
5152 	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5153 	srbm_soft_reset = adev->gfx.srbm_soft_reset;
5154 
5155 	if (grbm_soft_reset || srbm_soft_reset) {
5156 		tmp = RREG32(mmGMCON_DEBUG);
5157 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5158 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5159 		WREG32(mmGMCON_DEBUG, tmp);
5160 		udelay(50);
5161 	}
5162 
5163 	if (grbm_soft_reset) {
5164 		tmp = RREG32(mmGRBM_SOFT_RESET);
5165 		tmp |= grbm_soft_reset;
5166 		dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5167 		WREG32(mmGRBM_SOFT_RESET, tmp);
5168 		tmp = RREG32(mmGRBM_SOFT_RESET);
5169 
5170 		udelay(50);
5171 
5172 		tmp &= ~grbm_soft_reset;
5173 		WREG32(mmGRBM_SOFT_RESET, tmp);
5174 		tmp = RREG32(mmGRBM_SOFT_RESET);
5175 	}
5176 
5177 	if (srbm_soft_reset) {
5178 		tmp = RREG32(mmSRBM_SOFT_RESET);
5179 		tmp |= srbm_soft_reset;
5180 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5181 		WREG32(mmSRBM_SOFT_RESET, tmp);
5182 		tmp = RREG32(mmSRBM_SOFT_RESET);
5183 
5184 		udelay(50);
5185 
5186 		tmp &= ~srbm_soft_reset;
5187 		WREG32(mmSRBM_SOFT_RESET, tmp);
5188 		tmp = RREG32(mmSRBM_SOFT_RESET);
5189 	}
5190 
5191 	if (grbm_soft_reset || srbm_soft_reset) {
5192 		tmp = RREG32(mmGMCON_DEBUG);
5193 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5194 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5195 		WREG32(mmGMCON_DEBUG, tmp);
5196 	}
5197 
5198 	/* Wait a little for things to settle down */
5199 	udelay(50);
5200 
5201 	return 0;
5202 }
5203 
5204 static int gfx_v8_0_post_soft_reset(void *handle)
5205 {
5206 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5207 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5208 
5209 	if ((!adev->gfx.grbm_soft_reset) &&
5210 	    (!adev->gfx.srbm_soft_reset))
5211 		return 0;
5212 
5213 	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5214 	srbm_soft_reset = adev->gfx.srbm_soft_reset;
5215 
5216 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5217 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5218 		gfx_v8_0_cp_gfx_resume(adev);
5219 
5220 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5221 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5222 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5223 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5224 		int i;
5225 
5226 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5227 			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5228 
5229 			mutex_lock(&adev->srbm_mutex);
5230 			vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5231 			gfx_v8_0_deactivate_hqd(adev, 2);
5232 			vi_srbm_select(adev, 0, 0, 0, 0);
5233 			mutex_unlock(&adev->srbm_mutex);
5234 		}
5235 		gfx_v8_0_kiq_resume(adev);
5236 	}
5237 	gfx_v8_0_rlc_start(adev);
5238 
5239 	return 0;
5240 }
5241 
5242 /**
5243  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5244  *
5245  * @adev: amdgpu_device pointer
5246  *
5247  * Fetches a GPU clock counter snapshot.
5248  * Returns the 64 bit clock counter snapshot.
5249  */
5250 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5251 {
5252 	uint64_t clock;
5253 
5254 	mutex_lock(&adev->gfx.gpu_clock_mutex);
5255 	WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5256 	clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5257 		((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5258 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
5259 	return clock;
5260 }
5261 
5262 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5263 					  uint32_t vmid,
5264 					  uint32_t gds_base, uint32_t gds_size,
5265 					  uint32_t gws_base, uint32_t gws_size,
5266 					  uint32_t oa_base, uint32_t oa_size)
5267 {
5268 	gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5269 	gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5270 
5271 	gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5272 	gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5273 
5274 	oa_base = oa_base >> AMDGPU_OA_SHIFT;
5275 	oa_size = oa_size >> AMDGPU_OA_SHIFT;
5276 
5277 	/* GDS Base */
5278 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5279 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5280 				WRITE_DATA_DST_SEL(0)));
5281 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5282 	amdgpu_ring_write(ring, 0);
5283 	amdgpu_ring_write(ring, gds_base);
5284 
5285 	/* GDS Size */
5286 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5287 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5288 				WRITE_DATA_DST_SEL(0)));
5289 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5290 	amdgpu_ring_write(ring, 0);
5291 	amdgpu_ring_write(ring, gds_size);
5292 
5293 	/* GWS */
5294 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5295 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5296 				WRITE_DATA_DST_SEL(0)));
5297 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5298 	amdgpu_ring_write(ring, 0);
5299 	amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5300 
5301 	/* OA */
5302 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5303 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5304 				WRITE_DATA_DST_SEL(0)));
5305 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5306 	amdgpu_ring_write(ring, 0);
5307 	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5308 }
5309 
5310 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5311 {
5312 	WREG32(mmSQ_IND_INDEX,
5313 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5314 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5315 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
5316 		(SQ_IND_INDEX__FORCE_READ_MASK));
5317 	return RREG32(mmSQ_IND_DATA);
5318 }
5319 
5320 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5321 			   uint32_t wave, uint32_t thread,
5322 			   uint32_t regno, uint32_t num, uint32_t *out)
5323 {
5324 	WREG32(mmSQ_IND_INDEX,
5325 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5326 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5327 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
5328 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5329 		(SQ_IND_INDEX__FORCE_READ_MASK) |
5330 		(SQ_IND_INDEX__AUTO_INCR_MASK));
5331 	while (num--)
5332 		*(out++) = RREG32(mmSQ_IND_DATA);
5333 }
5334 
5335 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5336 {
5337 	/* type 0 wave data */
5338 	dst[(*no_fields)++] = 0;
5339 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5340 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5341 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5342 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5343 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5344 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5345 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5346 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5347 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5348 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5349 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5350 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5351 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5352 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5353 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5354 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5355 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5356 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5357 }
5358 
5359 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5360 				     uint32_t wave, uint32_t start,
5361 				     uint32_t size, uint32_t *dst)
5362 {
5363 	wave_read_regs(
5364 		adev, simd, wave, 0,
5365 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5366 }
5367 
5368 
5369 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5370 	.get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5371 	.select_se_sh = &gfx_v8_0_select_se_sh,
5372 	.read_wave_data = &gfx_v8_0_read_wave_data,
5373 	.read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5374 };
5375 
5376 static int gfx_v8_0_early_init(void *handle)
5377 {
5378 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5379 
5380 	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5381 	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5382 	adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5383 	gfx_v8_0_set_ring_funcs(adev);
5384 	gfx_v8_0_set_irq_funcs(adev);
5385 	gfx_v8_0_set_gds_init(adev);
5386 	gfx_v8_0_set_rlc_funcs(adev);
5387 
5388 	return 0;
5389 }
5390 
5391 static int gfx_v8_0_late_init(void *handle)
5392 {
5393 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5394 	int r;
5395 
5396 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5397 	if (r)
5398 		return r;
5399 
5400 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5401 	if (r)
5402 		return r;
5403 
5404 	/* requires IBs so do in late init after IB pool is initialized */
5405 	r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5406 	if (r)
5407 		return r;
5408 
5409 	amdgpu_set_powergating_state(adev,
5410 			AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5411 
5412 	return 0;
5413 }
5414 
5415 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5416 						       bool enable)
5417 {
5418 	if ((adev->asic_type == CHIP_POLARIS11) ||
5419 	    (adev->asic_type == CHIP_POLARIS12))
5420 		/* Send msg to SMU via Powerplay */
5421 		amdgpu_set_powergating_state(adev,
5422 					     AMD_IP_BLOCK_TYPE_SMC,
5423 					     enable ?
5424 					     AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5425 
5426 	WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5427 }
5428 
5429 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5430 							bool enable)
5431 {
5432 	WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5433 }
5434 
5435 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5436 		bool enable)
5437 {
5438 	WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5439 }
5440 
5441 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5442 					  bool enable)
5443 {
5444 	WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5445 }
5446 
5447 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5448 						bool enable)
5449 {
5450 	WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5451 
5452 	/* Read any GFX register to wake up GFX. */
5453 	if (!enable)
5454 		RREG32(mmDB_RENDER_CONTROL);
5455 }
5456 
5457 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5458 					  bool enable)
5459 {
5460 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5461 		cz_enable_gfx_cg_power_gating(adev, true);
5462 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5463 			cz_enable_gfx_pipeline_power_gating(adev, true);
5464 	} else {
5465 		cz_enable_gfx_cg_power_gating(adev, false);
5466 		cz_enable_gfx_pipeline_power_gating(adev, false);
5467 	}
5468 }
5469 
5470 static int gfx_v8_0_set_powergating_state(void *handle,
5471 					  enum amd_powergating_state state)
5472 {
5473 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5474 	bool enable = (state == AMD_PG_STATE_GATE);
5475 
5476 	if (amdgpu_sriov_vf(adev))
5477 		return 0;
5478 
5479 	switch (adev->asic_type) {
5480 	case CHIP_CARRIZO:
5481 	case CHIP_STONEY:
5482 
5483 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5484 			cz_enable_sck_slow_down_on_power_up(adev, true);
5485 			cz_enable_sck_slow_down_on_power_down(adev, true);
5486 		} else {
5487 			cz_enable_sck_slow_down_on_power_up(adev, false);
5488 			cz_enable_sck_slow_down_on_power_down(adev, false);
5489 		}
5490 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5491 			cz_enable_cp_power_gating(adev, true);
5492 		else
5493 			cz_enable_cp_power_gating(adev, false);
5494 
5495 		cz_update_gfx_cg_power_gating(adev, enable);
5496 
5497 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5498 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5499 		else
5500 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5501 
5502 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5503 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5504 		else
5505 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5506 		break;
5507 	case CHIP_POLARIS11:
5508 	case CHIP_POLARIS12:
5509 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5510 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5511 		else
5512 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5513 
5514 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5515 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5516 		else
5517 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5518 
5519 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5520 			polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5521 		else
5522 			polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5523 		break;
5524 	default:
5525 		break;
5526 	}
5527 
5528 	return 0;
5529 }
5530 
5531 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5532 {
5533 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5534 	int data;
5535 
5536 	if (amdgpu_sriov_vf(adev))
5537 		*flags = 0;
5538 
5539 	/* AMD_CG_SUPPORT_GFX_MGCG */
5540 	data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5541 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5542 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5543 
5544 	/* AMD_CG_SUPPORT_GFX_CGLG */
5545 	data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5546 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5547 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5548 
5549 	/* AMD_CG_SUPPORT_GFX_CGLS */
5550 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5551 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5552 
5553 	/* AMD_CG_SUPPORT_GFX_CGTS */
5554 	data = RREG32(mmCGTS_SM_CTRL_REG);
5555 	if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5556 		*flags |= AMD_CG_SUPPORT_GFX_CGTS;
5557 
5558 	/* AMD_CG_SUPPORT_GFX_CGTS_LS */
5559 	if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5560 		*flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5561 
5562 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
5563 	data = RREG32(mmRLC_MEM_SLP_CNTL);
5564 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5565 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5566 
5567 	/* AMD_CG_SUPPORT_GFX_CP_LS */
5568 	data = RREG32(mmCP_MEM_SLP_CNTL);
5569 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5570 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5571 }
5572 
5573 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5574 				     uint32_t reg_addr, uint32_t cmd)
5575 {
5576 	uint32_t data;
5577 
5578 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5579 
5580 	WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5581 	WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5582 
5583 	data = RREG32(mmRLC_SERDES_WR_CTRL);
5584 	if (adev->asic_type == CHIP_STONEY)
5585 		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5586 			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5587 			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5588 			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5589 			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5590 			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5591 			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5592 			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5593 			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5594 	else
5595 		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5596 			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5597 			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5598 			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5599 			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5600 			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5601 			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5602 			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5603 			  RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5604 			  RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5605 			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5606 	data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5607 		 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5608 		 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5609 		 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5610 
5611 	WREG32(mmRLC_SERDES_WR_CTRL, data);
5612 }
5613 
5614 #define MSG_ENTER_RLC_SAFE_MODE     1
5615 #define MSG_EXIT_RLC_SAFE_MODE      0
5616 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5617 #define RLC_GPR_REG2__REQ__SHIFT 0
5618 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5619 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5620 
5621 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5622 {
5623 	u32 data;
5624 	unsigned i;
5625 
5626 	data = RREG32(mmRLC_CNTL);
5627 	if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5628 		return;
5629 
5630 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5631 		data |= RLC_SAFE_MODE__CMD_MASK;
5632 		data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5633 		data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5634 		WREG32(mmRLC_SAFE_MODE, data);
5635 
5636 		for (i = 0; i < adev->usec_timeout; i++) {
5637 			if ((RREG32(mmRLC_GPM_STAT) &
5638 			     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5639 			      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5640 			    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5641 			     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5642 				break;
5643 			udelay(1);
5644 		}
5645 
5646 		for (i = 0; i < adev->usec_timeout; i++) {
5647 			if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5648 				break;
5649 			udelay(1);
5650 		}
5651 		adev->gfx.rlc.in_safe_mode = true;
5652 	}
5653 }
5654 
5655 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5656 {
5657 	u32 data = 0;
5658 	unsigned i;
5659 
5660 	data = RREG32(mmRLC_CNTL);
5661 	if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5662 		return;
5663 
5664 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5665 		if (adev->gfx.rlc.in_safe_mode) {
5666 			data |= RLC_SAFE_MODE__CMD_MASK;
5667 			data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5668 			WREG32(mmRLC_SAFE_MODE, data);
5669 			adev->gfx.rlc.in_safe_mode = false;
5670 		}
5671 	}
5672 
5673 	for (i = 0; i < adev->usec_timeout; i++) {
5674 		if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5675 			break;
5676 		udelay(1);
5677 	}
5678 }
5679 
5680 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5681 	.enter_safe_mode = iceland_enter_rlc_safe_mode,
5682 	.exit_safe_mode = iceland_exit_rlc_safe_mode
5683 };
5684 
5685 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5686 						      bool enable)
5687 {
5688 	uint32_t temp, data;
5689 
5690 	adev->gfx.rlc.funcs->enter_safe_mode(adev);
5691 
5692 	/* It is disabled by HW by default */
5693 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5694 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5695 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5696 				/* 1 - RLC memory Light sleep */
5697 				WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5698 
5699 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5700 				WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5701 		}
5702 
5703 		/* 3 - RLC_CGTT_MGCG_OVERRIDE */
5704 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5705 		if (adev->flags & AMD_IS_APU)
5706 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5707 				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5708 				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5709 		else
5710 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5711 				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5712 				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5713 				  RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5714 
5715 		if (temp != data)
5716 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5717 
5718 		/* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5719 		gfx_v8_0_wait_for_rlc_serdes(adev);
5720 
5721 		/* 5 - clear mgcg override */
5722 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5723 
5724 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5725 			/* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5726 			temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5727 			data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5728 			data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5729 			data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5730 			data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5731 			if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5732 			    (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5733 				data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5734 			data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5735 			data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5736 			if (temp != data)
5737 				WREG32(mmCGTS_SM_CTRL_REG, data);
5738 		}
5739 		udelay(50);
5740 
5741 		/* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5742 		gfx_v8_0_wait_for_rlc_serdes(adev);
5743 	} else {
5744 		/* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5745 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5746 		data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5747 				RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5748 				RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5749 				RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5750 		if (temp != data)
5751 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5752 
5753 		/* 2 - disable MGLS in RLC */
5754 		data = RREG32(mmRLC_MEM_SLP_CNTL);
5755 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5756 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5757 			WREG32(mmRLC_MEM_SLP_CNTL, data);
5758 		}
5759 
5760 		/* 3 - disable MGLS in CP */
5761 		data = RREG32(mmCP_MEM_SLP_CNTL);
5762 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5763 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5764 			WREG32(mmCP_MEM_SLP_CNTL, data);
5765 		}
5766 
5767 		/* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5768 		temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5769 		data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5770 				CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5771 		if (temp != data)
5772 			WREG32(mmCGTS_SM_CTRL_REG, data);
5773 
5774 		/* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5775 		gfx_v8_0_wait_for_rlc_serdes(adev);
5776 
5777 		/* 6 - set mgcg override */
5778 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5779 
5780 		udelay(50);
5781 
5782 		/* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5783 		gfx_v8_0_wait_for_rlc_serdes(adev);
5784 	}
5785 
5786 	adev->gfx.rlc.funcs->exit_safe_mode(adev);
5787 }
5788 
5789 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5790 						      bool enable)
5791 {
5792 	uint32_t temp, temp1, data, data1;
5793 
5794 	temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5795 
5796 	adev->gfx.rlc.funcs->enter_safe_mode(adev);
5797 
5798 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5799 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5800 		data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5801 		if (temp1 != data1)
5802 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5803 
5804 		/* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5805 		gfx_v8_0_wait_for_rlc_serdes(adev);
5806 
5807 		/* 2 - clear cgcg override */
5808 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5809 
5810 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5811 		gfx_v8_0_wait_for_rlc_serdes(adev);
5812 
5813 		/* 3 - write cmd to set CGLS */
5814 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5815 
5816 		/* 4 - enable cgcg */
5817 		data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5818 
5819 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5820 			/* enable cgls*/
5821 			data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5822 
5823 			temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5824 			data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5825 
5826 			if (temp1 != data1)
5827 				WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5828 		} else {
5829 			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5830 		}
5831 
5832 		if (temp != data)
5833 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5834 
5835 		/* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5836 		 * Cmp_busy/GFX_Idle interrupts
5837 		 */
5838 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5839 	} else {
5840 		/* disable cntx_empty_int_enable & GFX Idle interrupt */
5841 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5842 
5843 		/* TEST CGCG */
5844 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5845 		data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5846 				RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5847 		if (temp1 != data1)
5848 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5849 
5850 		/* read gfx register to wake up cgcg */
5851 		RREG32(mmCB_CGTT_SCLK_CTRL);
5852 		RREG32(mmCB_CGTT_SCLK_CTRL);
5853 		RREG32(mmCB_CGTT_SCLK_CTRL);
5854 		RREG32(mmCB_CGTT_SCLK_CTRL);
5855 
5856 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5857 		gfx_v8_0_wait_for_rlc_serdes(adev);
5858 
5859 		/* write cmd to Set CGCG Overrride */
5860 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5861 
5862 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5863 		gfx_v8_0_wait_for_rlc_serdes(adev);
5864 
5865 		/* write cmd to Clear CGLS */
5866 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5867 
5868 		/* disable cgcg, cgls should be disabled too. */
5869 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5870 			  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5871 		if (temp != data)
5872 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5873 		/* enable interrupts again for PG */
5874 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5875 	}
5876 
5877 	gfx_v8_0_wait_for_rlc_serdes(adev);
5878 
5879 	adev->gfx.rlc.funcs->exit_safe_mode(adev);
5880 }
5881 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5882 					    bool enable)
5883 {
5884 	if (enable) {
5885 		/* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5886 		 * ===  MGCG + MGLS + TS(CG/LS) ===
5887 		 */
5888 		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5889 		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5890 	} else {
5891 		/* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5892 		 * ===  CGCG + CGLS ===
5893 		 */
5894 		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5895 		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5896 	}
5897 	return 0;
5898 }
5899 
5900 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5901 					  enum amd_clockgating_state state)
5902 {
5903 	uint32_t msg_id, pp_state = 0;
5904 	uint32_t pp_support_state = 0;
5905 	void *pp_handle = adev->powerplay.pp_handle;
5906 
5907 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5908 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5909 			pp_support_state = PP_STATE_SUPPORT_LS;
5910 			pp_state = PP_STATE_LS;
5911 		}
5912 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5913 			pp_support_state |= PP_STATE_SUPPORT_CG;
5914 			pp_state |= PP_STATE_CG;
5915 		}
5916 		if (state == AMD_CG_STATE_UNGATE)
5917 			pp_state = 0;
5918 
5919 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5920 				PP_BLOCK_GFX_CG,
5921 				pp_support_state,
5922 				pp_state);
5923 		amd_set_clockgating_by_smu(pp_handle, msg_id);
5924 	}
5925 
5926 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5927 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5928 			pp_support_state = PP_STATE_SUPPORT_LS;
5929 			pp_state = PP_STATE_LS;
5930 		}
5931 
5932 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5933 			pp_support_state |= PP_STATE_SUPPORT_CG;
5934 			pp_state |= PP_STATE_CG;
5935 		}
5936 
5937 		if (state == AMD_CG_STATE_UNGATE)
5938 			pp_state = 0;
5939 
5940 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5941 				PP_BLOCK_GFX_MG,
5942 				pp_support_state,
5943 				pp_state);
5944 		amd_set_clockgating_by_smu(pp_handle, msg_id);
5945 	}
5946 
5947 	return 0;
5948 }
5949 
5950 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5951 					  enum amd_clockgating_state state)
5952 {
5953 
5954 	uint32_t msg_id, pp_state = 0;
5955 	uint32_t pp_support_state = 0;
5956 	void *pp_handle = adev->powerplay.pp_handle;
5957 
5958 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5959 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5960 			pp_support_state = PP_STATE_SUPPORT_LS;
5961 			pp_state = PP_STATE_LS;
5962 		}
5963 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5964 			pp_support_state |= PP_STATE_SUPPORT_CG;
5965 			pp_state |= PP_STATE_CG;
5966 		}
5967 		if (state == AMD_CG_STATE_UNGATE)
5968 			pp_state = 0;
5969 
5970 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5971 				PP_BLOCK_GFX_CG,
5972 				pp_support_state,
5973 				pp_state);
5974 		amd_set_clockgating_by_smu(pp_handle, msg_id);
5975 	}
5976 
5977 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5978 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5979 			pp_support_state = PP_STATE_SUPPORT_LS;
5980 			pp_state = PP_STATE_LS;
5981 		}
5982 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5983 			pp_support_state |= PP_STATE_SUPPORT_CG;
5984 			pp_state |= PP_STATE_CG;
5985 		}
5986 		if (state == AMD_CG_STATE_UNGATE)
5987 			pp_state = 0;
5988 
5989 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5990 				PP_BLOCK_GFX_3D,
5991 				pp_support_state,
5992 				pp_state);
5993 		amd_set_clockgating_by_smu(pp_handle, msg_id);
5994 	}
5995 
5996 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5997 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5998 			pp_support_state = PP_STATE_SUPPORT_LS;
5999 			pp_state = PP_STATE_LS;
6000 		}
6001 
6002 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6003 			pp_support_state |= PP_STATE_SUPPORT_CG;
6004 			pp_state |= PP_STATE_CG;
6005 		}
6006 
6007 		if (state == AMD_CG_STATE_UNGATE)
6008 			pp_state = 0;
6009 
6010 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6011 				PP_BLOCK_GFX_MG,
6012 				pp_support_state,
6013 				pp_state);
6014 		amd_set_clockgating_by_smu(pp_handle, msg_id);
6015 	}
6016 
6017 	if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6018 		pp_support_state = PP_STATE_SUPPORT_LS;
6019 
6020 		if (state == AMD_CG_STATE_UNGATE)
6021 			pp_state = 0;
6022 		else
6023 			pp_state = PP_STATE_LS;
6024 
6025 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6026 				PP_BLOCK_GFX_RLC,
6027 				pp_support_state,
6028 				pp_state);
6029 		amd_set_clockgating_by_smu(pp_handle, msg_id);
6030 	}
6031 
6032 	if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6033 		pp_support_state = PP_STATE_SUPPORT_LS;
6034 
6035 		if (state == AMD_CG_STATE_UNGATE)
6036 			pp_state = 0;
6037 		else
6038 			pp_state = PP_STATE_LS;
6039 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6040 			PP_BLOCK_GFX_CP,
6041 			pp_support_state,
6042 			pp_state);
6043 		amd_set_clockgating_by_smu(pp_handle, msg_id);
6044 	}
6045 
6046 	return 0;
6047 }
6048 
6049 static int gfx_v8_0_set_clockgating_state(void *handle,
6050 					  enum amd_clockgating_state state)
6051 {
6052 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6053 
6054 	if (amdgpu_sriov_vf(adev))
6055 		return 0;
6056 
6057 	switch (adev->asic_type) {
6058 	case CHIP_FIJI:
6059 	case CHIP_CARRIZO:
6060 	case CHIP_STONEY:
6061 		gfx_v8_0_update_gfx_clock_gating(adev,
6062 						 state == AMD_CG_STATE_GATE);
6063 		break;
6064 	case CHIP_TONGA:
6065 		gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6066 		break;
6067 	case CHIP_POLARIS10:
6068 	case CHIP_POLARIS11:
6069 	case CHIP_POLARIS12:
6070 		gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6071 		break;
6072 	default:
6073 		break;
6074 	}
6075 	return 0;
6076 }
6077 
6078 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6079 {
6080 	return ring->adev->wb.wb[ring->rptr_offs];
6081 }
6082 
6083 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6084 {
6085 	struct amdgpu_device *adev = ring->adev;
6086 
6087 	if (ring->use_doorbell)
6088 		/* XXX check if swapping is necessary on BE */
6089 		return ring->adev->wb.wb[ring->wptr_offs];
6090 	else
6091 		return RREG32(mmCP_RB0_WPTR);
6092 }
6093 
6094 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6095 {
6096 	struct amdgpu_device *adev = ring->adev;
6097 
6098 	if (ring->use_doorbell) {
6099 		/* XXX check if swapping is necessary on BE */
6100 		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6101 		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6102 	} else {
6103 		WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6104 		(void)RREG32(mmCP_RB0_WPTR);
6105 	}
6106 }
6107 
6108 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6109 {
6110 	u32 ref_and_mask, reg_mem_engine;
6111 
6112 	if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6113 	    (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6114 		switch (ring->me) {
6115 		case 1:
6116 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6117 			break;
6118 		case 2:
6119 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6120 			break;
6121 		default:
6122 			return;
6123 		}
6124 		reg_mem_engine = 0;
6125 	} else {
6126 		ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6127 		reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6128 	}
6129 
6130 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6131 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6132 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
6133 				 reg_mem_engine));
6134 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6135 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6136 	amdgpu_ring_write(ring, ref_and_mask);
6137 	amdgpu_ring_write(ring, ref_and_mask);
6138 	amdgpu_ring_write(ring, 0x20); /* poll interval */
6139 }
6140 
6141 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6142 {
6143 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6144 	amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6145 		EVENT_INDEX(4));
6146 
6147 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6148 	amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6149 		EVENT_INDEX(0));
6150 }
6151 
6152 
6153 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6154 {
6155 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6156 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6157 				 WRITE_DATA_DST_SEL(0) |
6158 				 WR_CONFIRM));
6159 	amdgpu_ring_write(ring, mmHDP_DEBUG0);
6160 	amdgpu_ring_write(ring, 0);
6161 	amdgpu_ring_write(ring, 1);
6162 
6163 }
6164 
6165 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6166 				      struct amdgpu_ib *ib,
6167 				      unsigned vm_id, bool ctx_switch)
6168 {
6169 	u32 header, control = 0;
6170 
6171 	if (ib->flags & AMDGPU_IB_FLAG_CE)
6172 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6173 	else
6174 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6175 
6176 	control |= ib->length_dw | (vm_id << 24);
6177 
6178 	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6179 		control |= INDIRECT_BUFFER_PRE_ENB(1);
6180 
6181 		if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6182 			gfx_v8_0_ring_emit_de_meta(ring);
6183 	}
6184 
6185 	amdgpu_ring_write(ring, header);
6186 	amdgpu_ring_write(ring,
6187 #ifdef __BIG_ENDIAN
6188 			  (2 << 0) |
6189 #endif
6190 			  (ib->gpu_addr & 0xFFFFFFFC));
6191 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6192 	amdgpu_ring_write(ring, control);
6193 }
6194 
6195 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6196 					  struct amdgpu_ib *ib,
6197 					  unsigned vm_id, bool ctx_switch)
6198 {
6199 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6200 
6201 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6202 	amdgpu_ring_write(ring,
6203 #ifdef __BIG_ENDIAN
6204 				(2 << 0) |
6205 #endif
6206 				(ib->gpu_addr & 0xFFFFFFFC));
6207 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6208 	amdgpu_ring_write(ring, control);
6209 }
6210 
6211 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6212 					 u64 seq, unsigned flags)
6213 {
6214 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6215 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6216 
6217 	/* EVENT_WRITE_EOP - flush caches, send int */
6218 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6219 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6220 				 EOP_TC_ACTION_EN |
6221 				 EOP_TC_WB_ACTION_EN |
6222 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6223 				 EVENT_INDEX(5)));
6224 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6225 	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6226 			  DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6227 	amdgpu_ring_write(ring, lower_32_bits(seq));
6228 	amdgpu_ring_write(ring, upper_32_bits(seq));
6229 
6230 }
6231 
6232 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6233 {
6234 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6235 	uint32_t seq = ring->fence_drv.sync_seq;
6236 	uint64_t addr = ring->fence_drv.gpu_addr;
6237 
6238 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6239 	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6240 				 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6241 				 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6242 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6243 	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6244 	amdgpu_ring_write(ring, seq);
6245 	amdgpu_ring_write(ring, 0xffffffff);
6246 	amdgpu_ring_write(ring, 4); /* poll interval */
6247 }
6248 
6249 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6250 					unsigned vm_id, uint64_t pd_addr)
6251 {
6252 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6253 
6254 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6255 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6256 				 WRITE_DATA_DST_SEL(0)) |
6257 				 WR_CONFIRM);
6258 	if (vm_id < 8) {
6259 		amdgpu_ring_write(ring,
6260 				  (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6261 	} else {
6262 		amdgpu_ring_write(ring,
6263 				  (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6264 	}
6265 	amdgpu_ring_write(ring, 0);
6266 	amdgpu_ring_write(ring, pd_addr >> 12);
6267 
6268 	/* bits 0-15 are the VM contexts0-15 */
6269 	/* invalidate the cache */
6270 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6271 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6272 				 WRITE_DATA_DST_SEL(0)));
6273 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6274 	amdgpu_ring_write(ring, 0);
6275 	amdgpu_ring_write(ring, 1 << vm_id);
6276 
6277 	/* wait for the invalidate to complete */
6278 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6279 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6280 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6281 				 WAIT_REG_MEM_ENGINE(0))); /* me */
6282 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6283 	amdgpu_ring_write(ring, 0);
6284 	amdgpu_ring_write(ring, 0); /* ref */
6285 	amdgpu_ring_write(ring, 0); /* mask */
6286 	amdgpu_ring_write(ring, 0x20); /* poll interval */
6287 
6288 	/* compute doesn't have PFP */
6289 	if (usepfp) {
6290 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6291 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6292 		amdgpu_ring_write(ring, 0x0);
6293 	}
6294 }
6295 
6296 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6297 {
6298 	return ring->adev->wb.wb[ring->wptr_offs];
6299 }
6300 
6301 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6302 {
6303 	struct amdgpu_device *adev = ring->adev;
6304 
6305 	/* XXX check if swapping is necessary on BE */
6306 	adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6307 	WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6308 }
6309 
6310 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6311 					     u64 addr, u64 seq,
6312 					     unsigned flags)
6313 {
6314 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6315 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6316 
6317 	/* RELEASE_MEM - flush caches, send int */
6318 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6319 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6320 				 EOP_TC_ACTION_EN |
6321 				 EOP_TC_WB_ACTION_EN |
6322 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6323 				 EVENT_INDEX(5)));
6324 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6325 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6326 	amdgpu_ring_write(ring, upper_32_bits(addr));
6327 	amdgpu_ring_write(ring, lower_32_bits(seq));
6328 	amdgpu_ring_write(ring, upper_32_bits(seq));
6329 }
6330 
6331 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6332 					 u64 seq, unsigned int flags)
6333 {
6334 	/* we only allocate 32bit for each seq wb address */
6335 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6336 
6337 	/* write fence seq to the "addr" */
6338 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6339 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6340 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6341 	amdgpu_ring_write(ring, lower_32_bits(addr));
6342 	amdgpu_ring_write(ring, upper_32_bits(addr));
6343 	amdgpu_ring_write(ring, lower_32_bits(seq));
6344 
6345 	if (flags & AMDGPU_FENCE_FLAG_INT) {
6346 		/* set register to trigger INT */
6347 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6348 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6349 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6350 		amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6351 		amdgpu_ring_write(ring, 0);
6352 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6353 	}
6354 }
6355 
6356 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6357 {
6358 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6359 	amdgpu_ring_write(ring, 0);
6360 }
6361 
6362 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6363 {
6364 	uint32_t dw2 = 0;
6365 
6366 	if (amdgpu_sriov_vf(ring->adev))
6367 		gfx_v8_0_ring_emit_ce_meta(ring);
6368 
6369 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6370 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6371 		gfx_v8_0_ring_emit_vgt_flush(ring);
6372 		/* set load_global_config & load_global_uconfig */
6373 		dw2 |= 0x8001;
6374 		/* set load_cs_sh_regs */
6375 		dw2 |= 0x01000000;
6376 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
6377 		dw2 |= 0x10002;
6378 
6379 		/* set load_ce_ram if preamble presented */
6380 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6381 			dw2 |= 0x10000000;
6382 	} else {
6383 		/* still load_ce_ram if this is the first time preamble presented
6384 		 * although there is no context switch happens.
6385 		 */
6386 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6387 			dw2 |= 0x10000000;
6388 	}
6389 
6390 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6391 	amdgpu_ring_write(ring, dw2);
6392 	amdgpu_ring_write(ring, 0);
6393 }
6394 
6395 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6396 {
6397 	unsigned ret;
6398 
6399 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6400 	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6401 	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6402 	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6403 	ret = ring->wptr & ring->buf_mask;
6404 	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6405 	return ret;
6406 }
6407 
6408 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6409 {
6410 	unsigned cur;
6411 
6412 	BUG_ON(offset > ring->buf_mask);
6413 	BUG_ON(ring->ring[offset] != 0x55aa55aa);
6414 
6415 	cur = (ring->wptr & ring->buf_mask) - 1;
6416 	if (likely(cur > offset))
6417 		ring->ring[offset] = cur - offset;
6418 	else
6419 		ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6420 }
6421 
6422 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6423 {
6424 	struct amdgpu_device *adev = ring->adev;
6425 
6426 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6427 	amdgpu_ring_write(ring, 0 |	/* src: register*/
6428 				(5 << 8) |	/* dst: memory */
6429 				(1 << 20));	/* write confirm */
6430 	amdgpu_ring_write(ring, reg);
6431 	amdgpu_ring_write(ring, 0);
6432 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6433 				adev->virt.reg_val_offs * 4));
6434 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6435 				adev->virt.reg_val_offs * 4));
6436 }
6437 
6438 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6439 				  uint32_t val)
6440 {
6441 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6442 	amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */
6443 	amdgpu_ring_write(ring, reg);
6444 	amdgpu_ring_write(ring, 0);
6445 	amdgpu_ring_write(ring, val);
6446 }
6447 
6448 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6449 						 enum amdgpu_interrupt_state state)
6450 {
6451 	WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6452 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6453 }
6454 
6455 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6456 						     int me, int pipe,
6457 						     enum amdgpu_interrupt_state state)
6458 {
6459 	u32 mec_int_cntl, mec_int_cntl_reg;
6460 
6461 	/*
6462 	 * amdgpu controls only the first MEC. That's why this function only
6463 	 * handles the setting of interrupts for this specific MEC. All other
6464 	 * pipes' interrupts are set by amdkfd.
6465 	 */
6466 
6467 	if (me == 1) {
6468 		switch (pipe) {
6469 		case 0:
6470 			mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6471 			break;
6472 		case 1:
6473 			mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6474 			break;
6475 		case 2:
6476 			mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6477 			break;
6478 		case 3:
6479 			mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6480 			break;
6481 		default:
6482 			DRM_DEBUG("invalid pipe %d\n", pipe);
6483 			return;
6484 		}
6485 	} else {
6486 		DRM_DEBUG("invalid me %d\n", me);
6487 		return;
6488 	}
6489 
6490 	switch (state) {
6491 	case AMDGPU_IRQ_STATE_DISABLE:
6492 		mec_int_cntl = RREG32(mec_int_cntl_reg);
6493 		mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6494 		WREG32(mec_int_cntl_reg, mec_int_cntl);
6495 		break;
6496 	case AMDGPU_IRQ_STATE_ENABLE:
6497 		mec_int_cntl = RREG32(mec_int_cntl_reg);
6498 		mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6499 		WREG32(mec_int_cntl_reg, mec_int_cntl);
6500 		break;
6501 	default:
6502 		break;
6503 	}
6504 }
6505 
6506 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6507 					     struct amdgpu_irq_src *source,
6508 					     unsigned type,
6509 					     enum amdgpu_interrupt_state state)
6510 {
6511 	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6512 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6513 
6514 	return 0;
6515 }
6516 
6517 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6518 					      struct amdgpu_irq_src *source,
6519 					      unsigned type,
6520 					      enum amdgpu_interrupt_state state)
6521 {
6522 	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6523 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6524 
6525 	return 0;
6526 }
6527 
6528 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6529 					    struct amdgpu_irq_src *src,
6530 					    unsigned type,
6531 					    enum amdgpu_interrupt_state state)
6532 {
6533 	switch (type) {
6534 	case AMDGPU_CP_IRQ_GFX_EOP:
6535 		gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6536 		break;
6537 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6538 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6539 		break;
6540 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6541 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6542 		break;
6543 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6544 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6545 		break;
6546 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6547 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6548 		break;
6549 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6550 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6551 		break;
6552 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6553 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6554 		break;
6555 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6556 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6557 		break;
6558 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6559 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6560 		break;
6561 	default:
6562 		break;
6563 	}
6564 	return 0;
6565 }
6566 
6567 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6568 			    struct amdgpu_irq_src *source,
6569 			    struct amdgpu_iv_entry *entry)
6570 {
6571 	int i;
6572 	u8 me_id, pipe_id, queue_id;
6573 	struct amdgpu_ring *ring;
6574 
6575 	DRM_DEBUG("IH: CP EOP\n");
6576 	me_id = (entry->ring_id & 0x0c) >> 2;
6577 	pipe_id = (entry->ring_id & 0x03) >> 0;
6578 	queue_id = (entry->ring_id & 0x70) >> 4;
6579 
6580 	switch (me_id) {
6581 	case 0:
6582 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6583 		break;
6584 	case 1:
6585 	case 2:
6586 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6587 			ring = &adev->gfx.compute_ring[i];
6588 			/* Per-queue interrupt is supported for MEC starting from VI.
6589 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
6590 			  */
6591 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6592 				amdgpu_fence_process(ring);
6593 		}
6594 		break;
6595 	}
6596 	return 0;
6597 }
6598 
6599 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6600 				 struct amdgpu_irq_src *source,
6601 				 struct amdgpu_iv_entry *entry)
6602 {
6603 	DRM_ERROR("Illegal register access in command stream\n");
6604 	schedule_work(&adev->reset_work);
6605 	return 0;
6606 }
6607 
6608 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6609 				  struct amdgpu_irq_src *source,
6610 				  struct amdgpu_iv_entry *entry)
6611 {
6612 	DRM_ERROR("Illegal instruction in command stream\n");
6613 	schedule_work(&adev->reset_work);
6614 	return 0;
6615 }
6616 
6617 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6618 					    struct amdgpu_irq_src *src,
6619 					    unsigned int type,
6620 					    enum amdgpu_interrupt_state state)
6621 {
6622 	struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6623 
6624 	switch (type) {
6625 	case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6626 		WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
6627 			     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6628 		if (ring->me == 1)
6629 			WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
6630 				     ring->pipe,
6631 				     GENERIC2_INT_ENABLE,
6632 				     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6633 		else
6634 			WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
6635 				     ring->pipe,
6636 				     GENERIC2_INT_ENABLE,
6637 				     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6638 		break;
6639 	default:
6640 		BUG(); /* kiq only support GENERIC2_INT now */
6641 		break;
6642 	}
6643 	return 0;
6644 }
6645 
6646 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
6647 			    struct amdgpu_irq_src *source,
6648 			    struct amdgpu_iv_entry *entry)
6649 {
6650 	u8 me_id, pipe_id, queue_id;
6651 	struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6652 
6653 	me_id = (entry->ring_id & 0x0c) >> 2;
6654 	pipe_id = (entry->ring_id & 0x03) >> 0;
6655 	queue_id = (entry->ring_id & 0x70) >> 4;
6656 	DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
6657 		   me_id, pipe_id, queue_id);
6658 
6659 	amdgpu_fence_process(ring);
6660 	return 0;
6661 }
6662 
6663 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6664 	.name = "gfx_v8_0",
6665 	.early_init = gfx_v8_0_early_init,
6666 	.late_init = gfx_v8_0_late_init,
6667 	.sw_init = gfx_v8_0_sw_init,
6668 	.sw_fini = gfx_v8_0_sw_fini,
6669 	.hw_init = gfx_v8_0_hw_init,
6670 	.hw_fini = gfx_v8_0_hw_fini,
6671 	.suspend = gfx_v8_0_suspend,
6672 	.resume = gfx_v8_0_resume,
6673 	.is_idle = gfx_v8_0_is_idle,
6674 	.wait_for_idle = gfx_v8_0_wait_for_idle,
6675 	.check_soft_reset = gfx_v8_0_check_soft_reset,
6676 	.pre_soft_reset = gfx_v8_0_pre_soft_reset,
6677 	.soft_reset = gfx_v8_0_soft_reset,
6678 	.post_soft_reset = gfx_v8_0_post_soft_reset,
6679 	.set_clockgating_state = gfx_v8_0_set_clockgating_state,
6680 	.set_powergating_state = gfx_v8_0_set_powergating_state,
6681 	.get_clockgating_state = gfx_v8_0_get_clockgating_state,
6682 };
6683 
6684 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6685 	.type = AMDGPU_RING_TYPE_GFX,
6686 	.align_mask = 0xff,
6687 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6688 	.support_64bit_ptrs = false,
6689 	.get_rptr = gfx_v8_0_ring_get_rptr,
6690 	.get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6691 	.set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6692 	.emit_frame_size = /* maximum 215dw if count 16 IBs in */
6693 		5 +  /* COND_EXEC */
6694 		7 +  /* PIPELINE_SYNC */
6695 		19 + /* VM_FLUSH */
6696 		8 +  /* FENCE for VM_FLUSH */
6697 		20 + /* GDS switch */
6698 		4 + /* double SWITCH_BUFFER,
6699 		       the first COND_EXEC jump to the place just
6700 			   prior to this double SWITCH_BUFFER  */
6701 		5 + /* COND_EXEC */
6702 		7 +	 /*	HDP_flush */
6703 		4 +	 /*	VGT_flush */
6704 		14 + /*	CE_META */
6705 		31 + /*	DE_META */
6706 		3 + /* CNTX_CTRL */
6707 		5 + /* HDP_INVL */
6708 		8 + 8 + /* FENCE x2 */
6709 		2, /* SWITCH_BUFFER */
6710 	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_gfx */
6711 	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6712 	.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6713 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6714 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6715 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6716 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6717 	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6718 	.test_ring = gfx_v8_0_ring_test_ring,
6719 	.test_ib = gfx_v8_0_ring_test_ib,
6720 	.insert_nop = amdgpu_ring_insert_nop,
6721 	.pad_ib = amdgpu_ring_generic_pad_ib,
6722 	.emit_switch_buffer = gfx_v8_ring_emit_sb,
6723 	.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6724 	.init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6725 	.patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6726 };
6727 
6728 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6729 	.type = AMDGPU_RING_TYPE_COMPUTE,
6730 	.align_mask = 0xff,
6731 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6732 	.support_64bit_ptrs = false,
6733 	.get_rptr = gfx_v8_0_ring_get_rptr,
6734 	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
6735 	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
6736 	.emit_frame_size =
6737 		20 + /* gfx_v8_0_ring_emit_gds_switch */
6738 		7 + /* gfx_v8_0_ring_emit_hdp_flush */
6739 		5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6740 		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6741 		17 + /* gfx_v8_0_ring_emit_vm_flush */
6742 		7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6743 	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_compute */
6744 	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
6745 	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
6746 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6747 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6748 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6749 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6750 	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6751 	.test_ring = gfx_v8_0_ring_test_ring,
6752 	.test_ib = gfx_v8_0_ring_test_ib,
6753 	.insert_nop = amdgpu_ring_insert_nop,
6754 	.pad_ib = amdgpu_ring_generic_pad_ib,
6755 };
6756 
6757 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6758 	.type = AMDGPU_RING_TYPE_KIQ,
6759 	.align_mask = 0xff,
6760 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6761 	.support_64bit_ptrs = false,
6762 	.get_rptr = gfx_v8_0_ring_get_rptr,
6763 	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
6764 	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
6765 	.emit_frame_size =
6766 		20 + /* gfx_v8_0_ring_emit_gds_switch */
6767 		7 + /* gfx_v8_0_ring_emit_hdp_flush */
6768 		5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6769 		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6770 		17 + /* gfx_v8_0_ring_emit_vm_flush */
6771 		7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6772 	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_compute */
6773 	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
6774 	.emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6775 	.test_ring = gfx_v8_0_ring_test_ring,
6776 	.test_ib = gfx_v8_0_ring_test_ib,
6777 	.insert_nop = amdgpu_ring_insert_nop,
6778 	.pad_ib = amdgpu_ring_generic_pad_ib,
6779 	.emit_rreg = gfx_v8_0_ring_emit_rreg,
6780 	.emit_wreg = gfx_v8_0_ring_emit_wreg,
6781 };
6782 
6783 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6784 {
6785 	int i;
6786 
6787 	adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6788 
6789 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6790 		adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6791 
6792 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6793 		adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6794 }
6795 
6796 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6797 	.set = gfx_v8_0_set_eop_interrupt_state,
6798 	.process = gfx_v8_0_eop_irq,
6799 };
6800 
6801 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6802 	.set = gfx_v8_0_set_priv_reg_fault_state,
6803 	.process = gfx_v8_0_priv_reg_irq,
6804 };
6805 
6806 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6807 	.set = gfx_v8_0_set_priv_inst_fault_state,
6808 	.process = gfx_v8_0_priv_inst_irq,
6809 };
6810 
6811 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
6812 	.set = gfx_v8_0_kiq_set_interrupt_state,
6813 	.process = gfx_v8_0_kiq_irq,
6814 };
6815 
6816 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6817 {
6818 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6819 	adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6820 
6821 	adev->gfx.priv_reg_irq.num_types = 1;
6822 	adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6823 
6824 	adev->gfx.priv_inst_irq.num_types = 1;
6825 	adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6826 
6827 	adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
6828 	adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
6829 }
6830 
6831 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6832 {
6833 	adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6834 }
6835 
6836 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6837 {
6838 	/* init asci gds info */
6839 	adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6840 	adev->gds.gws.total_size = 64;
6841 	adev->gds.oa.total_size = 16;
6842 
6843 	if (adev->gds.mem.total_size == 64 * 1024) {
6844 		adev->gds.mem.gfx_partition_size = 4096;
6845 		adev->gds.mem.cs_partition_size = 4096;
6846 
6847 		adev->gds.gws.gfx_partition_size = 4;
6848 		adev->gds.gws.cs_partition_size = 4;
6849 
6850 		adev->gds.oa.gfx_partition_size = 4;
6851 		adev->gds.oa.cs_partition_size = 1;
6852 	} else {
6853 		adev->gds.mem.gfx_partition_size = 1024;
6854 		adev->gds.mem.cs_partition_size = 1024;
6855 
6856 		adev->gds.gws.gfx_partition_size = 16;
6857 		adev->gds.gws.cs_partition_size = 16;
6858 
6859 		adev->gds.oa.gfx_partition_size = 4;
6860 		adev->gds.oa.cs_partition_size = 4;
6861 	}
6862 }
6863 
6864 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6865 						 u32 bitmap)
6866 {
6867 	u32 data;
6868 
6869 	if (!bitmap)
6870 		return;
6871 
6872 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6873 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6874 
6875 	WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6876 }
6877 
6878 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6879 {
6880 	u32 data, mask;
6881 
6882 	data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
6883 		RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6884 
6885 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6886 
6887 	return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
6888 }
6889 
6890 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6891 {
6892 	int i, j, k, counter, active_cu_number = 0;
6893 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6894 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6895 	unsigned disable_masks[4 * 2];
6896 	u32 ao_cu_num;
6897 
6898 	memset(cu_info, 0, sizeof(*cu_info));
6899 
6900 	if (adev->flags & AMD_IS_APU)
6901 		ao_cu_num = 2;
6902 	else
6903 		ao_cu_num = adev->gfx.config.max_cu_per_sh;
6904 
6905 	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
6906 
6907 	mutex_lock(&adev->grbm_idx_mutex);
6908 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6909 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6910 			mask = 1;
6911 			ao_bitmap = 0;
6912 			counter = 0;
6913 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
6914 			if (i < 4 && j < 2)
6915 				gfx_v8_0_set_user_cu_inactive_bitmap(
6916 					adev, disable_masks[i * 2 + j]);
6917 			bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6918 			cu_info->bitmap[i][j] = bitmap;
6919 
6920 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6921 				if (bitmap & mask) {
6922 					if (counter < ao_cu_num)
6923 						ao_bitmap |= mask;
6924 					counter ++;
6925 				}
6926 				mask <<= 1;
6927 			}
6928 			active_cu_number += counter;
6929 			if (i < 2 && j < 2)
6930 				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6931 			cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
6932 		}
6933 	}
6934 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6935 	mutex_unlock(&adev->grbm_idx_mutex);
6936 
6937 	cu_info->number = active_cu_number;
6938 	cu_info->ao_cu_mask = ao_cu_mask;
6939 }
6940 
6941 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
6942 {
6943 	.type = AMD_IP_BLOCK_TYPE_GFX,
6944 	.major = 8,
6945 	.minor = 0,
6946 	.rev = 0,
6947 	.funcs = &gfx_v8_0_ip_funcs,
6948 };
6949 
6950 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
6951 {
6952 	.type = AMD_IP_BLOCK_TYPE_GFX,
6953 	.major = 8,
6954 	.minor = 1,
6955 	.rev = 0,
6956 	.funcs = &gfx_v8_0_ip_funcs,
6957 };
6958 
6959 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
6960 {
6961 	uint64_t ce_payload_addr;
6962 	int cnt_ce;
6963 	static union {
6964 		struct vi_ce_ib_state regular;
6965 		struct vi_ce_ib_state_chained_ib chained;
6966 	} ce_payload = {};
6967 
6968 	if (ring->adev->virt.chained_ib_support) {
6969 		ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
6970 						  offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
6971 		cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
6972 	} else {
6973 		ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
6974 						  offsetof(struct vi_gfx_meta_data, ce_payload);
6975 		cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
6976 	}
6977 
6978 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
6979 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
6980 				WRITE_DATA_DST_SEL(8) |
6981 				WR_CONFIRM) |
6982 				WRITE_DATA_CACHE_POLICY(0));
6983 	amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
6984 	amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
6985 	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
6986 }
6987 
6988 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
6989 {
6990 	uint64_t de_payload_addr, gds_addr, csa_addr;
6991 	int cnt_de;
6992 	static union {
6993 		struct vi_de_ib_state regular;
6994 		struct vi_de_ib_state_chained_ib chained;
6995 	} de_payload = {};
6996 
6997 	csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096;
6998 	gds_addr = csa_addr + 4096;
6999 	if (ring->adev->virt.chained_ib_support) {
7000 		de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7001 		de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7002 		de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7003 		cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7004 	} else {
7005 		de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7006 		de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7007 		de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7008 		cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7009 	}
7010 
7011 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7012 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7013 				WRITE_DATA_DST_SEL(8) |
7014 				WR_CONFIRM) |
7015 				WRITE_DATA_CACHE_POLICY(0));
7016 	amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7017 	amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7018 	amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7019 }
7020