xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c (revision 7e6f7d24)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/kernel.h>
24 #include <linux/firmware.h>
25 #include <drm/drmP.h>
26 #include "amdgpu.h"
27 #include "amdgpu_gfx.h"
28 #include "vi.h"
29 #include "vi_structs.h"
30 #include "vid.h"
31 #include "amdgpu_ucode.h"
32 #include "amdgpu_atombios.h"
33 #include "atombios_i2c.h"
34 #include "clearstate_vi.h"
35 
36 #include "gmc/gmc_8_2_d.h"
37 #include "gmc/gmc_8_2_sh_mask.h"
38 
39 #include "oss/oss_3_0_d.h"
40 #include "oss/oss_3_0_sh_mask.h"
41 
42 #include "bif/bif_5_0_d.h"
43 #include "bif/bif_5_0_sh_mask.h"
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
48 
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
51 
52 #include "smu/smu_7_1_3_d.h"
53 
54 #define GFX8_NUM_GFX_RINGS     1
55 #define GFX8_MEC_HPD_SIZE 2048
56 
57 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
59 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
60 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
61 
62 #define ARRAY_MODE(x)					((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
63 #define PIPE_CONFIG(x)					((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
64 #define TILE_SPLIT(x)					((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
65 #define MICRO_TILE_MODE_NEW(x)				((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
66 #define SAMPLE_SPLIT(x)					((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
67 #define BANK_WIDTH(x)					((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
68 #define BANK_HEIGHT(x)					((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
69 #define MACRO_TILE_ASPECT(x)				((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
70 #define NUM_BANKS(x)					((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
71 
72 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
73 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
74 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
76 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
77 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
78 
79 /* BPM SERDES CMD */
80 #define SET_BPM_SERDES_CMD    1
81 #define CLE_BPM_SERDES_CMD    0
82 
83 /* BPM Register Address*/
84 enum {
85 	BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
86 	BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
87 	BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
88 	BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
89 	BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
90 	BPM_REG_FGCG_MAX
91 };
92 
93 #define RLC_FormatDirectRegListLength        14
94 
95 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
101 
102 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
106 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
107 
108 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
114 
115 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
119 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
120 
121 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
127 
128 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
129 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
130 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
131 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
133 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
134 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
139 
140 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
141 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
142 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
143 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
144 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
145 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
146 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
147 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
148 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
149 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
150 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
151 
152 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
153 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
154 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
155 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
156 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
157 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
158 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
163 
164 MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
165 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
166 MODULE_FIRMWARE("amdgpu/vegam_me.bin");
167 MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
168 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
169 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
170 
171 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
172 {
173 	{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
174 	{mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
175 	{mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
176 	{mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
177 	{mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
178 	{mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
179 	{mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
180 	{mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
181 	{mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
182 	{mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
183 	{mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
184 	{mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
185 	{mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
186 	{mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
187 	{mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
188 	{mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
189 };
190 
191 static const u32 golden_settings_tonga_a11[] =
192 {
193 	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
194 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
195 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
196 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
197 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
198 	mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
199 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
200 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
201 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
202 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
203 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
204 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
205 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
206 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
207 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
208 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
209 };
210 
211 static const u32 tonga_golden_common_all[] =
212 {
213 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
214 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
215 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
216 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
217 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
218 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
219 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
220 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
221 };
222 
223 static const u32 tonga_mgcg_cgcg_init[] =
224 {
225 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
226 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
227 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
228 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
229 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
230 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
231 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
232 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
233 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
234 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
235 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
236 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
237 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
238 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
239 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
240 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
241 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
242 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
243 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
244 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
245 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
246 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
247 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
248 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
249 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
250 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
251 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
252 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
253 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
254 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
255 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
256 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
257 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
258 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
259 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
260 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
261 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
262 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
263 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
264 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
265 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
266 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
267 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
268 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
269 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
270 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
271 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
272 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
273 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
274 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
275 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
276 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
277 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
278 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
279 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
280 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
281 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
282 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
283 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
284 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
285 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
286 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
287 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
288 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
289 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
290 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
291 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
292 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
293 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
294 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
295 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
296 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
297 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
298 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
299 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
300 };
301 
302 static const u32 golden_settings_vegam_a11[] =
303 {
304 	mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
305 	mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
306 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
307 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
308 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
309 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
310 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
311 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
312 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
313 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
314 	mmSQ_CONFIG, 0x07f80000, 0x01180000,
315 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
316 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
317 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
318 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
319 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
320 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
321 };
322 
323 static const u32 vegam_golden_common_all[] =
324 {
325 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
326 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
327 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
328 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
329 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
330 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
331 };
332 
333 static const u32 golden_settings_polaris11_a11[] =
334 {
335 	mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
336 	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
337 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
338 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
339 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
340 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
341 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
342 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
343 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
344 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
345 	mmSQ_CONFIG, 0x07f80000, 0x01180000,
346 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
347 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
348 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
349 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
350 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
351 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
352 };
353 
354 static const u32 polaris11_golden_common_all[] =
355 {
356 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
357 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
358 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
359 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
360 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
361 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
362 };
363 
364 static const u32 golden_settings_polaris10_a11[] =
365 {
366 	mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
367 	mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
368 	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
369 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
370 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
371 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
372 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
373 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
374 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
375 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
376 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
377 	mmSQ_CONFIG, 0x07f80000, 0x07180000,
378 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
379 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
380 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
381 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
382 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
383 };
384 
385 static const u32 polaris10_golden_common_all[] =
386 {
387 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
388 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
389 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
390 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
391 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
392 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
393 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
394 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
395 };
396 
397 static const u32 fiji_golden_common_all[] =
398 {
399 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
400 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
401 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
402 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
403 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
404 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
405 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
406 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
407 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
408 	mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
409 };
410 
411 static const u32 golden_settings_fiji_a10[] =
412 {
413 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
414 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
415 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
416 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
417 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
418 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
419 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
420 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
421 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
422 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
423 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
424 };
425 
426 static const u32 fiji_mgcg_cgcg_init[] =
427 {
428 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
429 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
430 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
431 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
432 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
433 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
434 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
435 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
436 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
437 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
438 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
439 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
440 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
441 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
442 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
443 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
444 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
445 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
446 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
447 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
448 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
449 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
450 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
451 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
452 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
453 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
454 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
455 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
456 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
457 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
458 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
459 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
460 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
461 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
462 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
463 };
464 
465 static const u32 golden_settings_iceland_a11[] =
466 {
467 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
468 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
469 	mmDB_DEBUG3, 0xc0000000, 0xc0000000,
470 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
471 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
472 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
473 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
474 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
475 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
476 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
477 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
478 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
479 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
480 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
481 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
482 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
483 };
484 
485 static const u32 iceland_golden_common_all[] =
486 {
487 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
488 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
489 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
490 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
491 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
492 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
493 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
494 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
495 };
496 
497 static const u32 iceland_mgcg_cgcg_init[] =
498 {
499 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
500 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
501 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
502 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
503 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
504 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
505 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
506 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
507 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
508 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
509 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
510 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
511 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
512 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
513 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
514 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
515 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
516 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
517 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
518 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
519 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
520 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
521 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
522 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
523 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
524 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
525 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
526 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
527 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
528 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
529 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
530 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
531 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
532 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
533 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
534 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
535 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
536 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
537 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
538 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
539 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
540 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
541 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
542 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
543 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
544 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
545 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
546 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
547 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
548 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
549 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
550 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
551 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
552 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
553 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
554 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
555 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
556 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
557 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
558 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
559 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
560 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
561 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
562 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
563 };
564 
565 static const u32 cz_golden_settings_a11[] =
566 {
567 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
568 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
569 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
570 	mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
571 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
572 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
573 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
574 	mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
575 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
576 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
577 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
578 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
579 };
580 
581 static const u32 cz_golden_common_all[] =
582 {
583 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
584 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
585 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
586 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
587 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
588 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
589 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
590 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
591 };
592 
593 static const u32 cz_mgcg_cgcg_init[] =
594 {
595 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
596 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
597 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
598 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
599 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
600 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
601 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
602 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
603 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
604 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
605 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
606 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
607 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
608 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
609 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
610 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
611 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
612 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
613 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
614 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
615 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
616 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
617 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
618 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
619 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
620 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
621 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
622 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
623 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
624 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
625 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
626 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
627 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
628 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
629 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
630 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
631 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
632 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
633 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
634 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
635 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
636 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
637 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
638 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
639 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
640 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
641 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
642 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
643 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
644 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
645 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
646 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
647 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
648 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
649 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
650 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
651 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
652 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
653 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
654 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
655 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
656 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
657 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
658 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
659 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
660 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
661 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
662 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
663 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
664 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
665 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
666 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
667 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
668 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
669 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
670 };
671 
672 static const u32 stoney_golden_settings_a11[] =
673 {
674 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
675 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
676 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
677 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
678 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
679 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
680 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
681 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
682 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
683 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
684 };
685 
686 static const u32 stoney_golden_common_all[] =
687 {
688 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
689 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
690 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
691 	mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
692 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
693 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
694 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
695 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
696 };
697 
698 static const u32 stoney_mgcg_cgcg_init[] =
699 {
700 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
701 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
702 	mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
703 	mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
704 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
705 };
706 
707 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
708 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
709 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
710 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
711 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
712 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
713 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
714 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
715 
716 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
717 {
718 	switch (adev->asic_type) {
719 	case CHIP_TOPAZ:
720 		amdgpu_device_program_register_sequence(adev,
721 							iceland_mgcg_cgcg_init,
722 							ARRAY_SIZE(iceland_mgcg_cgcg_init));
723 		amdgpu_device_program_register_sequence(adev,
724 							golden_settings_iceland_a11,
725 							ARRAY_SIZE(golden_settings_iceland_a11));
726 		amdgpu_device_program_register_sequence(adev,
727 							iceland_golden_common_all,
728 							ARRAY_SIZE(iceland_golden_common_all));
729 		break;
730 	case CHIP_FIJI:
731 		amdgpu_device_program_register_sequence(adev,
732 							fiji_mgcg_cgcg_init,
733 							ARRAY_SIZE(fiji_mgcg_cgcg_init));
734 		amdgpu_device_program_register_sequence(adev,
735 							golden_settings_fiji_a10,
736 							ARRAY_SIZE(golden_settings_fiji_a10));
737 		amdgpu_device_program_register_sequence(adev,
738 							fiji_golden_common_all,
739 							ARRAY_SIZE(fiji_golden_common_all));
740 		break;
741 
742 	case CHIP_TONGA:
743 		amdgpu_device_program_register_sequence(adev,
744 							tonga_mgcg_cgcg_init,
745 							ARRAY_SIZE(tonga_mgcg_cgcg_init));
746 		amdgpu_device_program_register_sequence(adev,
747 							golden_settings_tonga_a11,
748 							ARRAY_SIZE(golden_settings_tonga_a11));
749 		amdgpu_device_program_register_sequence(adev,
750 							tonga_golden_common_all,
751 							ARRAY_SIZE(tonga_golden_common_all));
752 		break;
753 	case CHIP_VEGAM:
754 		amdgpu_device_program_register_sequence(adev,
755 							golden_settings_vegam_a11,
756 							ARRAY_SIZE(golden_settings_vegam_a11));
757 		amdgpu_device_program_register_sequence(adev,
758 							vegam_golden_common_all,
759 							ARRAY_SIZE(vegam_golden_common_all));
760 		break;
761 	case CHIP_POLARIS11:
762 	case CHIP_POLARIS12:
763 		amdgpu_device_program_register_sequence(adev,
764 							golden_settings_polaris11_a11,
765 							ARRAY_SIZE(golden_settings_polaris11_a11));
766 		amdgpu_device_program_register_sequence(adev,
767 							polaris11_golden_common_all,
768 							ARRAY_SIZE(polaris11_golden_common_all));
769 		break;
770 	case CHIP_POLARIS10:
771 		amdgpu_device_program_register_sequence(adev,
772 							golden_settings_polaris10_a11,
773 							ARRAY_SIZE(golden_settings_polaris10_a11));
774 		amdgpu_device_program_register_sequence(adev,
775 							polaris10_golden_common_all,
776 							ARRAY_SIZE(polaris10_golden_common_all));
777 		WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
778 		if (adev->pdev->revision == 0xc7 &&
779 		    ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
780 		     (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
781 		     (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
782 			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
783 			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
784 		}
785 		break;
786 	case CHIP_CARRIZO:
787 		amdgpu_device_program_register_sequence(adev,
788 							cz_mgcg_cgcg_init,
789 							ARRAY_SIZE(cz_mgcg_cgcg_init));
790 		amdgpu_device_program_register_sequence(adev,
791 							cz_golden_settings_a11,
792 							ARRAY_SIZE(cz_golden_settings_a11));
793 		amdgpu_device_program_register_sequence(adev,
794 							cz_golden_common_all,
795 							ARRAY_SIZE(cz_golden_common_all));
796 		break;
797 	case CHIP_STONEY:
798 		amdgpu_device_program_register_sequence(adev,
799 							stoney_mgcg_cgcg_init,
800 							ARRAY_SIZE(stoney_mgcg_cgcg_init));
801 		amdgpu_device_program_register_sequence(adev,
802 							stoney_golden_settings_a11,
803 							ARRAY_SIZE(stoney_golden_settings_a11));
804 		amdgpu_device_program_register_sequence(adev,
805 							stoney_golden_common_all,
806 							ARRAY_SIZE(stoney_golden_common_all));
807 		break;
808 	default:
809 		break;
810 	}
811 }
812 
813 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
814 {
815 	adev->gfx.scratch.num_reg = 8;
816 	adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
817 	adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
818 }
819 
820 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
821 {
822 	struct amdgpu_device *adev = ring->adev;
823 	uint32_t scratch;
824 	uint32_t tmp = 0;
825 	unsigned i;
826 	int r;
827 
828 	r = amdgpu_gfx_scratch_get(adev, &scratch);
829 	if (r) {
830 		DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
831 		return r;
832 	}
833 	WREG32(scratch, 0xCAFEDEAD);
834 	r = amdgpu_ring_alloc(ring, 3);
835 	if (r) {
836 		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
837 			  ring->idx, r);
838 		amdgpu_gfx_scratch_free(adev, scratch);
839 		return r;
840 	}
841 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
842 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
843 	amdgpu_ring_write(ring, 0xDEADBEEF);
844 	amdgpu_ring_commit(ring);
845 
846 	for (i = 0; i < adev->usec_timeout; i++) {
847 		tmp = RREG32(scratch);
848 		if (tmp == 0xDEADBEEF)
849 			break;
850 		DRM_UDELAY(1);
851 	}
852 	if (i < adev->usec_timeout) {
853 		DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
854 			 ring->idx, i);
855 	} else {
856 		DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
857 			  ring->idx, scratch, tmp);
858 		r = -EINVAL;
859 	}
860 	amdgpu_gfx_scratch_free(adev, scratch);
861 	return r;
862 }
863 
864 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
865 {
866 	struct amdgpu_device *adev = ring->adev;
867 	struct amdgpu_ib ib;
868 	struct dma_fence *f = NULL;
869 	uint32_t scratch;
870 	uint32_t tmp = 0;
871 	long r;
872 
873 	r = amdgpu_gfx_scratch_get(adev, &scratch);
874 	if (r) {
875 		DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
876 		return r;
877 	}
878 	WREG32(scratch, 0xCAFEDEAD);
879 	memset(&ib, 0, sizeof(ib));
880 	r = amdgpu_ib_get(adev, NULL, 256, &ib);
881 	if (r) {
882 		DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
883 		goto err1;
884 	}
885 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
886 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
887 	ib.ptr[2] = 0xDEADBEEF;
888 	ib.length_dw = 3;
889 
890 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
891 	if (r)
892 		goto err2;
893 
894 	r = dma_fence_wait_timeout(f, false, timeout);
895 	if (r == 0) {
896 		DRM_ERROR("amdgpu: IB test timed out.\n");
897 		r = -ETIMEDOUT;
898 		goto err2;
899 	} else if (r < 0) {
900 		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
901 		goto err2;
902 	}
903 	tmp = RREG32(scratch);
904 	if (tmp == 0xDEADBEEF) {
905 		DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
906 		r = 0;
907 	} else {
908 		DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
909 			  scratch, tmp);
910 		r = -EINVAL;
911 	}
912 err2:
913 	amdgpu_ib_free(adev, &ib, NULL);
914 	dma_fence_put(f);
915 err1:
916 	amdgpu_gfx_scratch_free(adev, scratch);
917 	return r;
918 }
919 
920 
921 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
922 {
923 	release_firmware(adev->gfx.pfp_fw);
924 	adev->gfx.pfp_fw = NULL;
925 	release_firmware(adev->gfx.me_fw);
926 	adev->gfx.me_fw = NULL;
927 	release_firmware(adev->gfx.ce_fw);
928 	adev->gfx.ce_fw = NULL;
929 	release_firmware(adev->gfx.rlc_fw);
930 	adev->gfx.rlc_fw = NULL;
931 	release_firmware(adev->gfx.mec_fw);
932 	adev->gfx.mec_fw = NULL;
933 	if ((adev->asic_type != CHIP_STONEY) &&
934 	    (adev->asic_type != CHIP_TOPAZ))
935 		release_firmware(adev->gfx.mec2_fw);
936 	adev->gfx.mec2_fw = NULL;
937 
938 	kfree(adev->gfx.rlc.register_list_format);
939 }
940 
941 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
942 {
943 	const char *chip_name;
944 	char fw_name[30];
945 	int err;
946 	struct amdgpu_firmware_info *info = NULL;
947 	const struct common_firmware_header *header = NULL;
948 	const struct gfx_firmware_header_v1_0 *cp_hdr;
949 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
950 	unsigned int *tmp = NULL, i;
951 
952 	DRM_DEBUG("\n");
953 
954 	switch (adev->asic_type) {
955 	case CHIP_TOPAZ:
956 		chip_name = "topaz";
957 		break;
958 	case CHIP_TONGA:
959 		chip_name = "tonga";
960 		break;
961 	case CHIP_CARRIZO:
962 		chip_name = "carrizo";
963 		break;
964 	case CHIP_FIJI:
965 		chip_name = "fiji";
966 		break;
967 	case CHIP_STONEY:
968 		chip_name = "stoney";
969 		break;
970 	case CHIP_POLARIS10:
971 		chip_name = "polaris10";
972 		break;
973 	case CHIP_POLARIS11:
974 		chip_name = "polaris11";
975 		break;
976 	case CHIP_POLARIS12:
977 		chip_name = "polaris12";
978 		break;
979 	case CHIP_VEGAM:
980 		chip_name = "vegam";
981 		break;
982 	default:
983 		BUG();
984 	}
985 
986 	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
987 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
988 		err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
989 		if (err == -ENOENT) {
990 			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
991 			err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
992 		}
993 	} else {
994 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
995 		err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
996 	}
997 	if (err)
998 		goto out;
999 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1000 	if (err)
1001 		goto out;
1002 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1003 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1004 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1005 
1006 	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1007 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1008 		err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1009 		if (err == -ENOENT) {
1010 			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1011 			err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1012 		}
1013 	} else {
1014 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1015 		err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1016 	}
1017 	if (err)
1018 		goto out;
1019 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
1020 	if (err)
1021 		goto out;
1022 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1023 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1024 
1025 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1026 
1027 	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1028 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1029 		err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1030 		if (err == -ENOENT) {
1031 			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1032 			err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1033 		}
1034 	} else {
1035 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1036 		err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1037 	}
1038 	if (err)
1039 		goto out;
1040 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1041 	if (err)
1042 		goto out;
1043 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1044 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1045 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1046 
1047 	/*
1048 	 * Support for MCBP/Virtualization in combination with chained IBs is
1049 	 * formal released on feature version #46
1050 	 */
1051 	if (adev->gfx.ce_feature_version >= 46 &&
1052 	    adev->gfx.pfp_feature_version >= 46) {
1053 		adev->virt.chained_ib_support = true;
1054 		DRM_INFO("Chained IB support enabled!\n");
1055 	} else
1056 		adev->virt.chained_ib_support = false;
1057 
1058 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1059 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1060 	if (err)
1061 		goto out;
1062 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1063 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1064 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1065 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1066 
1067 	adev->gfx.rlc.save_and_restore_offset =
1068 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
1069 	adev->gfx.rlc.clear_state_descriptor_offset =
1070 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1071 	adev->gfx.rlc.avail_scratch_ram_locations =
1072 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1073 	adev->gfx.rlc.reg_restore_list_size =
1074 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
1075 	adev->gfx.rlc.reg_list_format_start =
1076 			le32_to_cpu(rlc_hdr->reg_list_format_start);
1077 	adev->gfx.rlc.reg_list_format_separate_start =
1078 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1079 	adev->gfx.rlc.starting_offsets_start =
1080 			le32_to_cpu(rlc_hdr->starting_offsets_start);
1081 	adev->gfx.rlc.reg_list_format_size_bytes =
1082 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1083 	adev->gfx.rlc.reg_list_size_bytes =
1084 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1085 
1086 	adev->gfx.rlc.register_list_format =
1087 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1088 					adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1089 
1090 	if (!adev->gfx.rlc.register_list_format) {
1091 		err = -ENOMEM;
1092 		goto out;
1093 	}
1094 
1095 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1096 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1097 	for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1098 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
1099 
1100 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1101 
1102 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1103 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1104 	for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1105 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1106 
1107 	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1108 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1109 		err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1110 		if (err == -ENOENT) {
1111 			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1112 			err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1113 		}
1114 	} else {
1115 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1116 		err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1117 	}
1118 	if (err)
1119 		goto out;
1120 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1121 	if (err)
1122 		goto out;
1123 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1124 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1125 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1126 
1127 	if ((adev->asic_type != CHIP_STONEY) &&
1128 	    (adev->asic_type != CHIP_TOPAZ)) {
1129 		if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1130 			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1131 			err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1132 			if (err == -ENOENT) {
1133 				snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1134 				err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1135 			}
1136 		} else {
1137 			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1138 			err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1139 		}
1140 		if (!err) {
1141 			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1142 			if (err)
1143 				goto out;
1144 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1145 				adev->gfx.mec2_fw->data;
1146 			adev->gfx.mec2_fw_version =
1147 				le32_to_cpu(cp_hdr->header.ucode_version);
1148 			adev->gfx.mec2_feature_version =
1149 				le32_to_cpu(cp_hdr->ucode_feature_version);
1150 		} else {
1151 			err = 0;
1152 			adev->gfx.mec2_fw = NULL;
1153 		}
1154 	}
1155 
1156 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
1157 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1158 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1159 		info->fw = adev->gfx.pfp_fw;
1160 		header = (const struct common_firmware_header *)info->fw->data;
1161 		adev->firmware.fw_size +=
1162 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1163 
1164 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1165 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1166 		info->fw = adev->gfx.me_fw;
1167 		header = (const struct common_firmware_header *)info->fw->data;
1168 		adev->firmware.fw_size +=
1169 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1170 
1171 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1172 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1173 		info->fw = adev->gfx.ce_fw;
1174 		header = (const struct common_firmware_header *)info->fw->data;
1175 		adev->firmware.fw_size +=
1176 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1177 
1178 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1179 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1180 		info->fw = adev->gfx.rlc_fw;
1181 		header = (const struct common_firmware_header *)info->fw->data;
1182 		adev->firmware.fw_size +=
1183 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1184 
1185 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1186 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1187 		info->fw = adev->gfx.mec_fw;
1188 		header = (const struct common_firmware_header *)info->fw->data;
1189 		adev->firmware.fw_size +=
1190 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1191 
1192 		/* we need account JT in */
1193 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1194 		adev->firmware.fw_size +=
1195 			ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1196 
1197 		if (amdgpu_sriov_vf(adev)) {
1198 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1199 			info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1200 			info->fw = adev->gfx.mec_fw;
1201 			adev->firmware.fw_size +=
1202 				ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1203 		}
1204 
1205 		if (adev->gfx.mec2_fw) {
1206 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1207 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1208 			info->fw = adev->gfx.mec2_fw;
1209 			header = (const struct common_firmware_header *)info->fw->data;
1210 			adev->firmware.fw_size +=
1211 				ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1212 		}
1213 
1214 	}
1215 
1216 out:
1217 	if (err) {
1218 		dev_err(adev->dev,
1219 			"gfx8: Failed to load firmware \"%s\"\n",
1220 			fw_name);
1221 		release_firmware(adev->gfx.pfp_fw);
1222 		adev->gfx.pfp_fw = NULL;
1223 		release_firmware(adev->gfx.me_fw);
1224 		adev->gfx.me_fw = NULL;
1225 		release_firmware(adev->gfx.ce_fw);
1226 		adev->gfx.ce_fw = NULL;
1227 		release_firmware(adev->gfx.rlc_fw);
1228 		adev->gfx.rlc_fw = NULL;
1229 		release_firmware(adev->gfx.mec_fw);
1230 		adev->gfx.mec_fw = NULL;
1231 		release_firmware(adev->gfx.mec2_fw);
1232 		adev->gfx.mec2_fw = NULL;
1233 	}
1234 	return err;
1235 }
1236 
1237 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1238 				    volatile u32 *buffer)
1239 {
1240 	u32 count = 0, i;
1241 	const struct cs_section_def *sect = NULL;
1242 	const struct cs_extent_def *ext = NULL;
1243 
1244 	if (adev->gfx.rlc.cs_data == NULL)
1245 		return;
1246 	if (buffer == NULL)
1247 		return;
1248 
1249 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1250 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1251 
1252 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1253 	buffer[count++] = cpu_to_le32(0x80000000);
1254 	buffer[count++] = cpu_to_le32(0x80000000);
1255 
1256 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1257 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1258 			if (sect->id == SECT_CONTEXT) {
1259 				buffer[count++] =
1260 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1261 				buffer[count++] = cpu_to_le32(ext->reg_index -
1262 						PACKET3_SET_CONTEXT_REG_START);
1263 				for (i = 0; i < ext->reg_count; i++)
1264 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1265 			} else {
1266 				return;
1267 			}
1268 		}
1269 	}
1270 
1271 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1272 	buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1273 			PACKET3_SET_CONTEXT_REG_START);
1274 	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1275 	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1276 
1277 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1278 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1279 
1280 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1281 	buffer[count++] = cpu_to_le32(0);
1282 }
1283 
1284 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1285 {
1286 	const __le32 *fw_data;
1287 	volatile u32 *dst_ptr;
1288 	int me, i, max_me = 4;
1289 	u32 bo_offset = 0;
1290 	u32 table_offset, table_size;
1291 
1292 	if (adev->asic_type == CHIP_CARRIZO)
1293 		max_me = 5;
1294 
1295 	/* write the cp table buffer */
1296 	dst_ptr = adev->gfx.rlc.cp_table_ptr;
1297 	for (me = 0; me < max_me; me++) {
1298 		if (me == 0) {
1299 			const struct gfx_firmware_header_v1_0 *hdr =
1300 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1301 			fw_data = (const __le32 *)
1302 				(adev->gfx.ce_fw->data +
1303 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1304 			table_offset = le32_to_cpu(hdr->jt_offset);
1305 			table_size = le32_to_cpu(hdr->jt_size);
1306 		} else if (me == 1) {
1307 			const struct gfx_firmware_header_v1_0 *hdr =
1308 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1309 			fw_data = (const __le32 *)
1310 				(adev->gfx.pfp_fw->data +
1311 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1312 			table_offset = le32_to_cpu(hdr->jt_offset);
1313 			table_size = le32_to_cpu(hdr->jt_size);
1314 		} else if (me == 2) {
1315 			const struct gfx_firmware_header_v1_0 *hdr =
1316 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1317 			fw_data = (const __le32 *)
1318 				(adev->gfx.me_fw->data +
1319 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1320 			table_offset = le32_to_cpu(hdr->jt_offset);
1321 			table_size = le32_to_cpu(hdr->jt_size);
1322 		} else if (me == 3) {
1323 			const struct gfx_firmware_header_v1_0 *hdr =
1324 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1325 			fw_data = (const __le32 *)
1326 				(adev->gfx.mec_fw->data +
1327 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1328 			table_offset = le32_to_cpu(hdr->jt_offset);
1329 			table_size = le32_to_cpu(hdr->jt_size);
1330 		} else  if (me == 4) {
1331 			const struct gfx_firmware_header_v1_0 *hdr =
1332 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1333 			fw_data = (const __le32 *)
1334 				(adev->gfx.mec2_fw->data +
1335 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1336 			table_offset = le32_to_cpu(hdr->jt_offset);
1337 			table_size = le32_to_cpu(hdr->jt_size);
1338 		}
1339 
1340 		for (i = 0; i < table_size; i ++) {
1341 			dst_ptr[bo_offset + i] =
1342 				cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1343 		}
1344 
1345 		bo_offset += table_size;
1346 	}
1347 }
1348 
1349 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1350 {
1351 	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
1352 	amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
1353 }
1354 
1355 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1356 {
1357 	volatile u32 *dst_ptr;
1358 	u32 dws;
1359 	const struct cs_section_def *cs_data;
1360 	int r;
1361 
1362 	adev->gfx.rlc.cs_data = vi_cs_data;
1363 
1364 	cs_data = adev->gfx.rlc.cs_data;
1365 
1366 	if (cs_data) {
1367 		/* clear state block */
1368 		adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1369 
1370 		r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
1371 					      AMDGPU_GEM_DOMAIN_VRAM,
1372 					      &adev->gfx.rlc.clear_state_obj,
1373 					      &adev->gfx.rlc.clear_state_gpu_addr,
1374 					      (void **)&adev->gfx.rlc.cs_ptr);
1375 		if (r) {
1376 			dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1377 			gfx_v8_0_rlc_fini(adev);
1378 			return r;
1379 		}
1380 
1381 		/* set up the cs buffer */
1382 		dst_ptr = adev->gfx.rlc.cs_ptr;
1383 		gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1384 		amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1385 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1386 	}
1387 
1388 	if ((adev->asic_type == CHIP_CARRIZO) ||
1389 	    (adev->asic_type == CHIP_STONEY)) {
1390 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1391 		r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
1392 					      PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1393 					      &adev->gfx.rlc.cp_table_obj,
1394 					      &adev->gfx.rlc.cp_table_gpu_addr,
1395 					      (void **)&adev->gfx.rlc.cp_table_ptr);
1396 		if (r) {
1397 			dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1398 			return r;
1399 		}
1400 
1401 		cz_init_cp_jump_table(adev);
1402 
1403 		amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1404 		amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1405 	}
1406 
1407 	return 0;
1408 }
1409 
1410 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1411 {
1412 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1413 }
1414 
1415 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1416 {
1417 	int r;
1418 	u32 *hpd;
1419 	size_t mec_hpd_size;
1420 
1421 	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1422 
1423 	/* take ownership of the relevant compute queues */
1424 	amdgpu_gfx_compute_queue_acquire(adev);
1425 
1426 	mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1427 
1428 	r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1429 				      AMDGPU_GEM_DOMAIN_GTT,
1430 				      &adev->gfx.mec.hpd_eop_obj,
1431 				      &adev->gfx.mec.hpd_eop_gpu_addr,
1432 				      (void **)&hpd);
1433 	if (r) {
1434 		dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1435 		return r;
1436 	}
1437 
1438 	memset(hpd, 0, mec_hpd_size);
1439 
1440 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1441 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1442 
1443 	return 0;
1444 }
1445 
1446 static const u32 vgpr_init_compute_shader[] =
1447 {
1448 	0x7e000209, 0x7e020208,
1449 	0x7e040207, 0x7e060206,
1450 	0x7e080205, 0x7e0a0204,
1451 	0x7e0c0203, 0x7e0e0202,
1452 	0x7e100201, 0x7e120200,
1453 	0x7e140209, 0x7e160208,
1454 	0x7e180207, 0x7e1a0206,
1455 	0x7e1c0205, 0x7e1e0204,
1456 	0x7e200203, 0x7e220202,
1457 	0x7e240201, 0x7e260200,
1458 	0x7e280209, 0x7e2a0208,
1459 	0x7e2c0207, 0x7e2e0206,
1460 	0x7e300205, 0x7e320204,
1461 	0x7e340203, 0x7e360202,
1462 	0x7e380201, 0x7e3a0200,
1463 	0x7e3c0209, 0x7e3e0208,
1464 	0x7e400207, 0x7e420206,
1465 	0x7e440205, 0x7e460204,
1466 	0x7e480203, 0x7e4a0202,
1467 	0x7e4c0201, 0x7e4e0200,
1468 	0x7e500209, 0x7e520208,
1469 	0x7e540207, 0x7e560206,
1470 	0x7e580205, 0x7e5a0204,
1471 	0x7e5c0203, 0x7e5e0202,
1472 	0x7e600201, 0x7e620200,
1473 	0x7e640209, 0x7e660208,
1474 	0x7e680207, 0x7e6a0206,
1475 	0x7e6c0205, 0x7e6e0204,
1476 	0x7e700203, 0x7e720202,
1477 	0x7e740201, 0x7e760200,
1478 	0x7e780209, 0x7e7a0208,
1479 	0x7e7c0207, 0x7e7e0206,
1480 	0xbf8a0000, 0xbf810000,
1481 };
1482 
1483 static const u32 sgpr_init_compute_shader[] =
1484 {
1485 	0xbe8a0100, 0xbe8c0102,
1486 	0xbe8e0104, 0xbe900106,
1487 	0xbe920108, 0xbe940100,
1488 	0xbe960102, 0xbe980104,
1489 	0xbe9a0106, 0xbe9c0108,
1490 	0xbe9e0100, 0xbea00102,
1491 	0xbea20104, 0xbea40106,
1492 	0xbea60108, 0xbea80100,
1493 	0xbeaa0102, 0xbeac0104,
1494 	0xbeae0106, 0xbeb00108,
1495 	0xbeb20100, 0xbeb40102,
1496 	0xbeb60104, 0xbeb80106,
1497 	0xbeba0108, 0xbebc0100,
1498 	0xbebe0102, 0xbec00104,
1499 	0xbec20106, 0xbec40108,
1500 	0xbec60100, 0xbec80102,
1501 	0xbee60004, 0xbee70005,
1502 	0xbeea0006, 0xbeeb0007,
1503 	0xbee80008, 0xbee90009,
1504 	0xbefc0000, 0xbf8a0000,
1505 	0xbf810000, 0x00000000,
1506 };
1507 
1508 static const u32 vgpr_init_regs[] =
1509 {
1510 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1511 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1512 	mmCOMPUTE_NUM_THREAD_X, 256*4,
1513 	mmCOMPUTE_NUM_THREAD_Y, 1,
1514 	mmCOMPUTE_NUM_THREAD_Z, 1,
1515 	mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1516 	mmCOMPUTE_PGM_RSRC2, 20,
1517 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1518 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1519 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1520 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1521 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1522 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1523 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1524 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1525 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1526 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1527 };
1528 
1529 static const u32 sgpr1_init_regs[] =
1530 {
1531 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1532 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1533 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1534 	mmCOMPUTE_NUM_THREAD_Y, 1,
1535 	mmCOMPUTE_NUM_THREAD_Z, 1,
1536 	mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1537 	mmCOMPUTE_PGM_RSRC2, 20,
1538 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1539 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1540 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1541 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1542 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1543 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1544 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1545 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1546 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1547 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1548 };
1549 
1550 static const u32 sgpr2_init_regs[] =
1551 {
1552 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1553 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1554 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1555 	mmCOMPUTE_NUM_THREAD_Y, 1,
1556 	mmCOMPUTE_NUM_THREAD_Z, 1,
1557 	mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1558 	mmCOMPUTE_PGM_RSRC2, 20,
1559 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1560 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1561 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1562 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1563 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1564 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1565 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1566 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1567 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1568 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1569 };
1570 
1571 static const u32 sec_ded_counter_registers[] =
1572 {
1573 	mmCPC_EDC_ATC_CNT,
1574 	mmCPC_EDC_SCRATCH_CNT,
1575 	mmCPC_EDC_UCODE_CNT,
1576 	mmCPF_EDC_ATC_CNT,
1577 	mmCPF_EDC_ROQ_CNT,
1578 	mmCPF_EDC_TAG_CNT,
1579 	mmCPG_EDC_ATC_CNT,
1580 	mmCPG_EDC_DMA_CNT,
1581 	mmCPG_EDC_TAG_CNT,
1582 	mmDC_EDC_CSINVOC_CNT,
1583 	mmDC_EDC_RESTORE_CNT,
1584 	mmDC_EDC_STATE_CNT,
1585 	mmGDS_EDC_CNT,
1586 	mmGDS_EDC_GRBM_CNT,
1587 	mmGDS_EDC_OA_DED,
1588 	mmSPI_EDC_CNT,
1589 	mmSQC_ATC_EDC_GATCL1_CNT,
1590 	mmSQC_EDC_CNT,
1591 	mmSQ_EDC_DED_CNT,
1592 	mmSQ_EDC_INFO,
1593 	mmSQ_EDC_SEC_CNT,
1594 	mmTCC_EDC_CNT,
1595 	mmTCP_ATC_EDC_GATCL1_CNT,
1596 	mmTCP_EDC_CNT,
1597 	mmTD_EDC_CNT
1598 };
1599 
1600 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1601 {
1602 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1603 	struct amdgpu_ib ib;
1604 	struct dma_fence *f = NULL;
1605 	int r, i;
1606 	u32 tmp;
1607 	unsigned total_size, vgpr_offset, sgpr_offset;
1608 	u64 gpu_addr;
1609 
1610 	/* only supported on CZ */
1611 	if (adev->asic_type != CHIP_CARRIZO)
1612 		return 0;
1613 
1614 	/* bail if the compute ring is not ready */
1615 	if (!ring->ready)
1616 		return 0;
1617 
1618 	tmp = RREG32(mmGB_EDC_MODE);
1619 	WREG32(mmGB_EDC_MODE, 0);
1620 
1621 	total_size =
1622 		(((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1623 	total_size +=
1624 		(((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1625 	total_size +=
1626 		(((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1627 	total_size = ALIGN(total_size, 256);
1628 	vgpr_offset = total_size;
1629 	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1630 	sgpr_offset = total_size;
1631 	total_size += sizeof(sgpr_init_compute_shader);
1632 
1633 	/* allocate an indirect buffer to put the commands in */
1634 	memset(&ib, 0, sizeof(ib));
1635 	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1636 	if (r) {
1637 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1638 		return r;
1639 	}
1640 
1641 	/* load the compute shaders */
1642 	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1643 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1644 
1645 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1646 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1647 
1648 	/* init the ib length to 0 */
1649 	ib.length_dw = 0;
1650 
1651 	/* VGPR */
1652 	/* write the register state for the compute dispatch */
1653 	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1654 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1655 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1656 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1657 	}
1658 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1659 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1660 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1661 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1662 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1663 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1664 
1665 	/* write dispatch packet */
1666 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1667 	ib.ptr[ib.length_dw++] = 8; /* x */
1668 	ib.ptr[ib.length_dw++] = 1; /* y */
1669 	ib.ptr[ib.length_dw++] = 1; /* z */
1670 	ib.ptr[ib.length_dw++] =
1671 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1672 
1673 	/* write CS partial flush packet */
1674 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1675 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1676 
1677 	/* SGPR1 */
1678 	/* write the register state for the compute dispatch */
1679 	for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1680 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1681 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1682 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1683 	}
1684 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1685 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1686 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1687 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1688 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1689 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1690 
1691 	/* write dispatch packet */
1692 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1693 	ib.ptr[ib.length_dw++] = 8; /* x */
1694 	ib.ptr[ib.length_dw++] = 1; /* y */
1695 	ib.ptr[ib.length_dw++] = 1; /* z */
1696 	ib.ptr[ib.length_dw++] =
1697 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1698 
1699 	/* write CS partial flush packet */
1700 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1701 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1702 
1703 	/* SGPR2 */
1704 	/* write the register state for the compute dispatch */
1705 	for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1706 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1707 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1708 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1709 	}
1710 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1711 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1712 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1713 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1714 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1715 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1716 
1717 	/* write dispatch packet */
1718 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1719 	ib.ptr[ib.length_dw++] = 8; /* x */
1720 	ib.ptr[ib.length_dw++] = 1; /* y */
1721 	ib.ptr[ib.length_dw++] = 1; /* z */
1722 	ib.ptr[ib.length_dw++] =
1723 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1724 
1725 	/* write CS partial flush packet */
1726 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1727 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1728 
1729 	/* shedule the ib on the ring */
1730 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1731 	if (r) {
1732 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1733 		goto fail;
1734 	}
1735 
1736 	/* wait for the GPU to finish processing the IB */
1737 	r = dma_fence_wait(f, false);
1738 	if (r) {
1739 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1740 		goto fail;
1741 	}
1742 
1743 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1744 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1745 	WREG32(mmGB_EDC_MODE, tmp);
1746 
1747 	tmp = RREG32(mmCC_GC_EDC_CONFIG);
1748 	tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1749 	WREG32(mmCC_GC_EDC_CONFIG, tmp);
1750 
1751 
1752 	/* read back registers to clear the counters */
1753 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1754 		RREG32(sec_ded_counter_registers[i]);
1755 
1756 fail:
1757 	amdgpu_ib_free(adev, &ib, NULL);
1758 	dma_fence_put(f);
1759 
1760 	return r;
1761 }
1762 
1763 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1764 {
1765 	u32 gb_addr_config;
1766 	u32 mc_shared_chmap, mc_arb_ramcfg;
1767 	u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1768 	u32 tmp;
1769 	int ret;
1770 
1771 	switch (adev->asic_type) {
1772 	case CHIP_TOPAZ:
1773 		adev->gfx.config.max_shader_engines = 1;
1774 		adev->gfx.config.max_tile_pipes = 2;
1775 		adev->gfx.config.max_cu_per_sh = 6;
1776 		adev->gfx.config.max_sh_per_se = 1;
1777 		adev->gfx.config.max_backends_per_se = 2;
1778 		adev->gfx.config.max_texture_channel_caches = 2;
1779 		adev->gfx.config.max_gprs = 256;
1780 		adev->gfx.config.max_gs_threads = 32;
1781 		adev->gfx.config.max_hw_contexts = 8;
1782 
1783 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1784 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1785 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1786 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1787 		gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1788 		break;
1789 	case CHIP_FIJI:
1790 		adev->gfx.config.max_shader_engines = 4;
1791 		adev->gfx.config.max_tile_pipes = 16;
1792 		adev->gfx.config.max_cu_per_sh = 16;
1793 		adev->gfx.config.max_sh_per_se = 1;
1794 		adev->gfx.config.max_backends_per_se = 4;
1795 		adev->gfx.config.max_texture_channel_caches = 16;
1796 		adev->gfx.config.max_gprs = 256;
1797 		adev->gfx.config.max_gs_threads = 32;
1798 		adev->gfx.config.max_hw_contexts = 8;
1799 
1800 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1801 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1802 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1803 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1804 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1805 		break;
1806 	case CHIP_POLARIS11:
1807 	case CHIP_POLARIS12:
1808 		ret = amdgpu_atombios_get_gfx_info(adev);
1809 		if (ret)
1810 			return ret;
1811 		adev->gfx.config.max_gprs = 256;
1812 		adev->gfx.config.max_gs_threads = 32;
1813 		adev->gfx.config.max_hw_contexts = 8;
1814 
1815 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1816 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1817 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1818 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1819 		gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1820 		break;
1821 	case CHIP_POLARIS10:
1822 	case CHIP_VEGAM:
1823 		ret = amdgpu_atombios_get_gfx_info(adev);
1824 		if (ret)
1825 			return ret;
1826 		adev->gfx.config.max_gprs = 256;
1827 		adev->gfx.config.max_gs_threads = 32;
1828 		adev->gfx.config.max_hw_contexts = 8;
1829 
1830 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1831 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1832 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1833 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1834 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1835 		break;
1836 	case CHIP_TONGA:
1837 		adev->gfx.config.max_shader_engines = 4;
1838 		adev->gfx.config.max_tile_pipes = 8;
1839 		adev->gfx.config.max_cu_per_sh = 8;
1840 		adev->gfx.config.max_sh_per_se = 1;
1841 		adev->gfx.config.max_backends_per_se = 2;
1842 		adev->gfx.config.max_texture_channel_caches = 8;
1843 		adev->gfx.config.max_gprs = 256;
1844 		adev->gfx.config.max_gs_threads = 32;
1845 		adev->gfx.config.max_hw_contexts = 8;
1846 
1847 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1848 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1849 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1850 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1851 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1852 		break;
1853 	case CHIP_CARRIZO:
1854 		adev->gfx.config.max_shader_engines = 1;
1855 		adev->gfx.config.max_tile_pipes = 2;
1856 		adev->gfx.config.max_sh_per_se = 1;
1857 		adev->gfx.config.max_backends_per_se = 2;
1858 		adev->gfx.config.max_cu_per_sh = 8;
1859 		adev->gfx.config.max_texture_channel_caches = 2;
1860 		adev->gfx.config.max_gprs = 256;
1861 		adev->gfx.config.max_gs_threads = 32;
1862 		adev->gfx.config.max_hw_contexts = 8;
1863 
1864 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1865 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1866 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1867 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1868 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1869 		break;
1870 	case CHIP_STONEY:
1871 		adev->gfx.config.max_shader_engines = 1;
1872 		adev->gfx.config.max_tile_pipes = 2;
1873 		adev->gfx.config.max_sh_per_se = 1;
1874 		adev->gfx.config.max_backends_per_se = 1;
1875 		adev->gfx.config.max_cu_per_sh = 3;
1876 		adev->gfx.config.max_texture_channel_caches = 2;
1877 		adev->gfx.config.max_gprs = 256;
1878 		adev->gfx.config.max_gs_threads = 16;
1879 		adev->gfx.config.max_hw_contexts = 8;
1880 
1881 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1882 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1883 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1884 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1885 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1886 		break;
1887 	default:
1888 		adev->gfx.config.max_shader_engines = 2;
1889 		adev->gfx.config.max_tile_pipes = 4;
1890 		adev->gfx.config.max_cu_per_sh = 2;
1891 		adev->gfx.config.max_sh_per_se = 1;
1892 		adev->gfx.config.max_backends_per_se = 2;
1893 		adev->gfx.config.max_texture_channel_caches = 4;
1894 		adev->gfx.config.max_gprs = 256;
1895 		adev->gfx.config.max_gs_threads = 32;
1896 		adev->gfx.config.max_hw_contexts = 8;
1897 
1898 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1899 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1900 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1901 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1902 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1903 		break;
1904 	}
1905 
1906 	mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1907 	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1908 	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1909 
1910 	adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1911 	adev->gfx.config.mem_max_burst_length_bytes = 256;
1912 	if (adev->flags & AMD_IS_APU) {
1913 		/* Get memory bank mapping mode. */
1914 		tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1915 		dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1916 		dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1917 
1918 		tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1919 		dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1920 		dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1921 
1922 		/* Validate settings in case only one DIMM installed. */
1923 		if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1924 			dimm00_addr_map = 0;
1925 		if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1926 			dimm01_addr_map = 0;
1927 		if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1928 			dimm10_addr_map = 0;
1929 		if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1930 			dimm11_addr_map = 0;
1931 
1932 		/* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1933 		/* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1934 		if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1935 			adev->gfx.config.mem_row_size_in_kb = 2;
1936 		else
1937 			adev->gfx.config.mem_row_size_in_kb = 1;
1938 	} else {
1939 		tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1940 		adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1941 		if (adev->gfx.config.mem_row_size_in_kb > 4)
1942 			adev->gfx.config.mem_row_size_in_kb = 4;
1943 	}
1944 
1945 	adev->gfx.config.shader_engine_tile_size = 32;
1946 	adev->gfx.config.num_gpus = 1;
1947 	adev->gfx.config.multi_gpu_tile_size = 64;
1948 
1949 	/* fix up row size */
1950 	switch (adev->gfx.config.mem_row_size_in_kb) {
1951 	case 1:
1952 	default:
1953 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1954 		break;
1955 	case 2:
1956 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1957 		break;
1958 	case 4:
1959 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1960 		break;
1961 	}
1962 	adev->gfx.config.gb_addr_config = gb_addr_config;
1963 
1964 	return 0;
1965 }
1966 
1967 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1968 					int mec, int pipe, int queue)
1969 {
1970 	int r;
1971 	unsigned irq_type;
1972 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1973 
1974 	ring = &adev->gfx.compute_ring[ring_id];
1975 
1976 	/* mec0 is me1 */
1977 	ring->me = mec + 1;
1978 	ring->pipe = pipe;
1979 	ring->queue = queue;
1980 
1981 	ring->ring_obj = NULL;
1982 	ring->use_doorbell = true;
1983 	ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
1984 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1985 				+ (ring_id * GFX8_MEC_HPD_SIZE);
1986 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1987 
1988 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1989 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1990 		+ ring->pipe;
1991 
1992 	/* type-2 packets are deprecated on MEC, use type-3 instead */
1993 	r = amdgpu_ring_init(adev, ring, 1024,
1994 			&adev->gfx.eop_irq, irq_type);
1995 	if (r)
1996 		return r;
1997 
1998 
1999 	return 0;
2000 }
2001 
2002 static int gfx_v8_0_sw_init(void *handle)
2003 {
2004 	int i, j, k, r, ring_id;
2005 	struct amdgpu_ring *ring;
2006 	struct amdgpu_kiq *kiq;
2007 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2008 
2009 	switch (adev->asic_type) {
2010 	case CHIP_TONGA:
2011 	case CHIP_CARRIZO:
2012 	case CHIP_FIJI:
2013 	case CHIP_POLARIS10:
2014 	case CHIP_POLARIS11:
2015 	case CHIP_POLARIS12:
2016 	case CHIP_VEGAM:
2017 		adev->gfx.mec.num_mec = 2;
2018 		break;
2019 	case CHIP_TOPAZ:
2020 	case CHIP_STONEY:
2021 	default:
2022 		adev->gfx.mec.num_mec = 1;
2023 		break;
2024 	}
2025 
2026 	adev->gfx.mec.num_pipe_per_mec = 4;
2027 	adev->gfx.mec.num_queue_per_pipe = 8;
2028 
2029 	/* KIQ event */
2030 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
2031 	if (r)
2032 		return r;
2033 
2034 	/* EOP Event */
2035 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
2036 	if (r)
2037 		return r;
2038 
2039 	/* Privileged reg */
2040 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
2041 			      &adev->gfx.priv_reg_irq);
2042 	if (r)
2043 		return r;
2044 
2045 	/* Privileged inst */
2046 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
2047 			      &adev->gfx.priv_inst_irq);
2048 	if (r)
2049 		return r;
2050 
2051 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2052 
2053 	gfx_v8_0_scratch_init(adev);
2054 
2055 	r = gfx_v8_0_init_microcode(adev);
2056 	if (r) {
2057 		DRM_ERROR("Failed to load gfx firmware!\n");
2058 		return r;
2059 	}
2060 
2061 	r = gfx_v8_0_rlc_init(adev);
2062 	if (r) {
2063 		DRM_ERROR("Failed to init rlc BOs!\n");
2064 		return r;
2065 	}
2066 
2067 	r = gfx_v8_0_mec_init(adev);
2068 	if (r) {
2069 		DRM_ERROR("Failed to init MEC BOs!\n");
2070 		return r;
2071 	}
2072 
2073 	/* set up the gfx ring */
2074 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2075 		ring = &adev->gfx.gfx_ring[i];
2076 		ring->ring_obj = NULL;
2077 		sprintf(ring->name, "gfx");
2078 		/* no gfx doorbells on iceland */
2079 		if (adev->asic_type != CHIP_TOPAZ) {
2080 			ring->use_doorbell = true;
2081 			ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2082 		}
2083 
2084 		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2085 				     AMDGPU_CP_IRQ_GFX_EOP);
2086 		if (r)
2087 			return r;
2088 	}
2089 
2090 
2091 	/* set up the compute queues - allocate horizontally across pipes */
2092 	ring_id = 0;
2093 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2094 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2095 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2096 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2097 					continue;
2098 
2099 				r = gfx_v8_0_compute_ring_init(adev,
2100 								ring_id,
2101 								i, k, j);
2102 				if (r)
2103 					return r;
2104 
2105 				ring_id++;
2106 			}
2107 		}
2108 	}
2109 
2110 	r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2111 	if (r) {
2112 		DRM_ERROR("Failed to init KIQ BOs!\n");
2113 		return r;
2114 	}
2115 
2116 	kiq = &adev->gfx.kiq;
2117 	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2118 	if (r)
2119 		return r;
2120 
2121 	/* create MQD for all compute queues as well as KIQ for SRIOV case */
2122 	r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2123 	if (r)
2124 		return r;
2125 
2126 	/* reserve GDS, GWS and OA resource for gfx */
2127 	r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2128 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2129 				    &adev->gds.gds_gfx_bo, NULL, NULL);
2130 	if (r)
2131 		return r;
2132 
2133 	r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2134 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2135 				    &adev->gds.gws_gfx_bo, NULL, NULL);
2136 	if (r)
2137 		return r;
2138 
2139 	r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2140 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2141 				    &adev->gds.oa_gfx_bo, NULL, NULL);
2142 	if (r)
2143 		return r;
2144 
2145 	adev->gfx.ce_ram_size = 0x8000;
2146 
2147 	r = gfx_v8_0_gpu_early_init(adev);
2148 	if (r)
2149 		return r;
2150 
2151 	return 0;
2152 }
2153 
2154 static int gfx_v8_0_sw_fini(void *handle)
2155 {
2156 	int i;
2157 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2158 
2159 	amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2160 	amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2161 	amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2162 
2163 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2164 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2165 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2166 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2167 
2168 	amdgpu_gfx_compute_mqd_sw_fini(adev);
2169 	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2170 	amdgpu_gfx_kiq_fini(adev);
2171 
2172 	gfx_v8_0_mec_fini(adev);
2173 	gfx_v8_0_rlc_fini(adev);
2174 	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2175 				&adev->gfx.rlc.clear_state_gpu_addr,
2176 				(void **)&adev->gfx.rlc.cs_ptr);
2177 	if ((adev->asic_type == CHIP_CARRIZO) ||
2178 	    (adev->asic_type == CHIP_STONEY)) {
2179 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2180 				&adev->gfx.rlc.cp_table_gpu_addr,
2181 				(void **)&adev->gfx.rlc.cp_table_ptr);
2182 	}
2183 	gfx_v8_0_free_microcode(adev);
2184 
2185 	return 0;
2186 }
2187 
2188 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2189 {
2190 	uint32_t *modearray, *mod2array;
2191 	const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2192 	const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2193 	u32 reg_offset;
2194 
2195 	modearray = adev->gfx.config.tile_mode_array;
2196 	mod2array = adev->gfx.config.macrotile_mode_array;
2197 
2198 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2199 		modearray[reg_offset] = 0;
2200 
2201 	for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2202 		mod2array[reg_offset] = 0;
2203 
2204 	switch (adev->asic_type) {
2205 	case CHIP_TOPAZ:
2206 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2207 				PIPE_CONFIG(ADDR_SURF_P2) |
2208 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2209 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2210 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2211 				PIPE_CONFIG(ADDR_SURF_P2) |
2212 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2213 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2214 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2215 				PIPE_CONFIG(ADDR_SURF_P2) |
2216 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2217 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2218 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2219 				PIPE_CONFIG(ADDR_SURF_P2) |
2220 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2221 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2222 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2223 				PIPE_CONFIG(ADDR_SURF_P2) |
2224 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2225 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2226 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2227 				PIPE_CONFIG(ADDR_SURF_P2) |
2228 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2229 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2230 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2231 				PIPE_CONFIG(ADDR_SURF_P2) |
2232 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2233 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2234 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2235 				PIPE_CONFIG(ADDR_SURF_P2));
2236 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2237 				PIPE_CONFIG(ADDR_SURF_P2) |
2238 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2239 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2240 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2241 				 PIPE_CONFIG(ADDR_SURF_P2) |
2242 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2243 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2244 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2245 				 PIPE_CONFIG(ADDR_SURF_P2) |
2246 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2247 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2248 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2249 				 PIPE_CONFIG(ADDR_SURF_P2) |
2250 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2251 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2252 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2253 				 PIPE_CONFIG(ADDR_SURF_P2) |
2254 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2255 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2256 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2257 				 PIPE_CONFIG(ADDR_SURF_P2) |
2258 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2259 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2260 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2261 				 PIPE_CONFIG(ADDR_SURF_P2) |
2262 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2263 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2264 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2265 				 PIPE_CONFIG(ADDR_SURF_P2) |
2266 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2267 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2268 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2269 				 PIPE_CONFIG(ADDR_SURF_P2) |
2270 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2271 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2272 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2273 				 PIPE_CONFIG(ADDR_SURF_P2) |
2274 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2275 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2276 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2277 				 PIPE_CONFIG(ADDR_SURF_P2) |
2278 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2279 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2280 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2281 				 PIPE_CONFIG(ADDR_SURF_P2) |
2282 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2283 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2284 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2285 				 PIPE_CONFIG(ADDR_SURF_P2) |
2286 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2287 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2288 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2289 				 PIPE_CONFIG(ADDR_SURF_P2) |
2290 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2291 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2292 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2293 				 PIPE_CONFIG(ADDR_SURF_P2) |
2294 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2295 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2296 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2297 				 PIPE_CONFIG(ADDR_SURF_P2) |
2298 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2299 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2300 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2301 				 PIPE_CONFIG(ADDR_SURF_P2) |
2302 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2303 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2304 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2305 				 PIPE_CONFIG(ADDR_SURF_P2) |
2306 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2307 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2308 
2309 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2310 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2311 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2312 				NUM_BANKS(ADDR_SURF_8_BANK));
2313 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2314 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2315 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2316 				NUM_BANKS(ADDR_SURF_8_BANK));
2317 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2318 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2319 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2320 				NUM_BANKS(ADDR_SURF_8_BANK));
2321 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2322 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2323 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2324 				NUM_BANKS(ADDR_SURF_8_BANK));
2325 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2326 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2327 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2328 				NUM_BANKS(ADDR_SURF_8_BANK));
2329 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2330 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2331 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2332 				NUM_BANKS(ADDR_SURF_8_BANK));
2333 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2334 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2335 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2336 				NUM_BANKS(ADDR_SURF_8_BANK));
2337 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2338 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2339 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2340 				NUM_BANKS(ADDR_SURF_16_BANK));
2341 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2342 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2343 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2344 				NUM_BANKS(ADDR_SURF_16_BANK));
2345 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2346 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2347 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2348 				 NUM_BANKS(ADDR_SURF_16_BANK));
2349 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2350 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2351 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2352 				 NUM_BANKS(ADDR_SURF_16_BANK));
2353 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2354 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2355 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2356 				 NUM_BANKS(ADDR_SURF_16_BANK));
2357 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2358 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2359 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2360 				 NUM_BANKS(ADDR_SURF_16_BANK));
2361 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2362 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2363 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2364 				 NUM_BANKS(ADDR_SURF_8_BANK));
2365 
2366 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2367 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2368 			    reg_offset != 23)
2369 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2370 
2371 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2372 			if (reg_offset != 7)
2373 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2374 
2375 		break;
2376 	case CHIP_FIJI:
2377 	case CHIP_VEGAM:
2378 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2379 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2380 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2381 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2382 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2383 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2384 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2385 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2386 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2387 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2388 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2389 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2390 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2391 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2393 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2394 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2395 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2397 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2398 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2399 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2400 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2401 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2402 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2403 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2404 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2405 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2406 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2407 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2408 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2409 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2410 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2411 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2412 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2413 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2414 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2415 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2416 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2417 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2418 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2419 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2420 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2421 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2422 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2423 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2424 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2425 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2426 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2427 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2428 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2429 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2430 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2431 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2432 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2433 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2434 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2435 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2436 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2437 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2438 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2439 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2441 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2442 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2443 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2444 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2445 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2446 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2447 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2448 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2449 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2450 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2451 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2452 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2453 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2454 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2455 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2456 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2457 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2458 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2459 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2460 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2461 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2462 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2463 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2464 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2465 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2466 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2467 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2468 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2469 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2470 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2471 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2472 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2473 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2474 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2475 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2476 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2477 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2478 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2479 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2480 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2481 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2482 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2483 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2484 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2485 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2486 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2487 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2488 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2489 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2490 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2491 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2492 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2493 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2494 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2495 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2496 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2497 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2498 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2499 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2500 
2501 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2502 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2503 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2504 				NUM_BANKS(ADDR_SURF_8_BANK));
2505 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2506 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2507 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2508 				NUM_BANKS(ADDR_SURF_8_BANK));
2509 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2510 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2511 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2512 				NUM_BANKS(ADDR_SURF_8_BANK));
2513 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2514 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2515 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2516 				NUM_BANKS(ADDR_SURF_8_BANK));
2517 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2518 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2519 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2520 				NUM_BANKS(ADDR_SURF_8_BANK));
2521 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2522 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2523 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2524 				NUM_BANKS(ADDR_SURF_8_BANK));
2525 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2526 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2527 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2528 				NUM_BANKS(ADDR_SURF_8_BANK));
2529 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2530 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2531 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2532 				NUM_BANKS(ADDR_SURF_8_BANK));
2533 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2534 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2535 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2536 				NUM_BANKS(ADDR_SURF_8_BANK));
2537 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2538 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2539 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2540 				 NUM_BANKS(ADDR_SURF_8_BANK));
2541 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2542 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2543 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2544 				 NUM_BANKS(ADDR_SURF_8_BANK));
2545 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2546 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2547 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2548 				 NUM_BANKS(ADDR_SURF_8_BANK));
2549 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2550 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2551 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2552 				 NUM_BANKS(ADDR_SURF_8_BANK));
2553 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2554 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2555 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2556 				 NUM_BANKS(ADDR_SURF_4_BANK));
2557 
2558 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2559 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2560 
2561 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2562 			if (reg_offset != 7)
2563 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2564 
2565 		break;
2566 	case CHIP_TONGA:
2567 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2568 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2569 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2570 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2571 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2572 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2573 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2574 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2575 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2576 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2577 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2578 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2579 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2580 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2581 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2582 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2583 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2584 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2585 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2586 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2587 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2588 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2589 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2590 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2591 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2592 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2593 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2594 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2595 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2596 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2597 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2598 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2599 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2600 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2601 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2602 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2603 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2604 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2605 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2606 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2607 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2608 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2609 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2610 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2611 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2612 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2613 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2614 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2615 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2616 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2617 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2618 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2619 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2620 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2621 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2622 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2623 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2624 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2625 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2626 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2627 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2628 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2629 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2630 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2631 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2632 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2633 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2634 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2635 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2636 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2637 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2638 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2639 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2640 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2641 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2642 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2643 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2644 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2645 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2646 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2647 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2648 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2649 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2650 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2651 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2652 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2653 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2654 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2655 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2656 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2657 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2658 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2659 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2660 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2661 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2662 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2663 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2664 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2665 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2666 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2667 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2668 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2669 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2670 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2671 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2672 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2673 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2674 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2675 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2676 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2677 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2678 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2679 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2680 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2681 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2682 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2683 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2684 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2685 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2686 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2688 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2689 
2690 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2691 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2692 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2693 				NUM_BANKS(ADDR_SURF_16_BANK));
2694 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2695 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2696 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2697 				NUM_BANKS(ADDR_SURF_16_BANK));
2698 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2699 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2700 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2701 				NUM_BANKS(ADDR_SURF_16_BANK));
2702 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2703 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2704 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2705 				NUM_BANKS(ADDR_SURF_16_BANK));
2706 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2707 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2708 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2709 				NUM_BANKS(ADDR_SURF_16_BANK));
2710 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2711 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2712 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2713 				NUM_BANKS(ADDR_SURF_16_BANK));
2714 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2715 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2716 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2717 				NUM_BANKS(ADDR_SURF_16_BANK));
2718 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2719 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2720 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2721 				NUM_BANKS(ADDR_SURF_16_BANK));
2722 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2723 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2724 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2725 				NUM_BANKS(ADDR_SURF_16_BANK));
2726 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2728 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2729 				 NUM_BANKS(ADDR_SURF_16_BANK));
2730 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2731 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2732 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2733 				 NUM_BANKS(ADDR_SURF_16_BANK));
2734 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2735 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2736 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2737 				 NUM_BANKS(ADDR_SURF_8_BANK));
2738 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2739 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2740 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2741 				 NUM_BANKS(ADDR_SURF_4_BANK));
2742 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2743 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2744 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2745 				 NUM_BANKS(ADDR_SURF_4_BANK));
2746 
2747 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2748 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2749 
2750 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2751 			if (reg_offset != 7)
2752 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2753 
2754 		break;
2755 	case CHIP_POLARIS11:
2756 	case CHIP_POLARIS12:
2757 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2758 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2759 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2760 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2761 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2762 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2763 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2764 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2765 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2766 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2767 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2768 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2769 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2770 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2771 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2772 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2773 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2774 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2775 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2776 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2777 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2778 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2779 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2780 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2781 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2782 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2783 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2784 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2785 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2786 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2787 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2788 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2789 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2790 				PIPE_CONFIG(ADDR_SURF_P4_16x16));
2791 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2792 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2793 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2794 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2795 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2796 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2797 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2798 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2799 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2800 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2801 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2802 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2803 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2804 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2805 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2806 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2807 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2808 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2809 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2810 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2811 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2812 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2813 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2814 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2815 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2816 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2817 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2818 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2819 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2820 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2821 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2822 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2823 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2824 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2825 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2826 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2827 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2828 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2829 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2830 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2831 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2832 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2833 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2834 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2835 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2836 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2837 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2838 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2839 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2840 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2841 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2842 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2843 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2844 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2845 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2846 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2847 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2848 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2849 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2850 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2851 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2852 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2853 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2854 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2855 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2856 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2857 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2858 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2859 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2860 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2861 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2862 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2863 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2864 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2865 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2866 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2867 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2868 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2869 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2870 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2871 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2872 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2873 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2874 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2875 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2876 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2877 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2878 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2879 
2880 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2881 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2882 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2883 				NUM_BANKS(ADDR_SURF_16_BANK));
2884 
2885 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2886 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2887 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2888 				NUM_BANKS(ADDR_SURF_16_BANK));
2889 
2890 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2891 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2892 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2893 				NUM_BANKS(ADDR_SURF_16_BANK));
2894 
2895 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2896 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2897 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2898 				NUM_BANKS(ADDR_SURF_16_BANK));
2899 
2900 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2901 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2902 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2903 				NUM_BANKS(ADDR_SURF_16_BANK));
2904 
2905 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2906 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2907 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2908 				NUM_BANKS(ADDR_SURF_16_BANK));
2909 
2910 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2911 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2912 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2913 				NUM_BANKS(ADDR_SURF_16_BANK));
2914 
2915 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2916 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2917 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2918 				NUM_BANKS(ADDR_SURF_16_BANK));
2919 
2920 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2921 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2922 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2923 				NUM_BANKS(ADDR_SURF_16_BANK));
2924 
2925 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2926 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2927 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2928 				NUM_BANKS(ADDR_SURF_16_BANK));
2929 
2930 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2931 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2932 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2933 				NUM_BANKS(ADDR_SURF_16_BANK));
2934 
2935 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2936 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2937 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2938 				NUM_BANKS(ADDR_SURF_16_BANK));
2939 
2940 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2941 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2942 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2943 				NUM_BANKS(ADDR_SURF_8_BANK));
2944 
2945 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2946 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2947 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2948 				NUM_BANKS(ADDR_SURF_4_BANK));
2949 
2950 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2951 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2952 
2953 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2954 			if (reg_offset != 7)
2955 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2956 
2957 		break;
2958 	case CHIP_POLARIS10:
2959 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2960 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2961 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2962 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2963 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2964 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2965 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2966 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2967 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2968 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2969 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2970 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2971 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2972 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2973 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2974 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2975 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2976 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2977 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2978 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2979 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2980 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2981 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2982 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2983 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2984 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2985 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2986 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2987 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2988 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2989 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2990 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2991 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2992 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2993 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2994 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2995 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2996 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2997 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2998 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2999 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3000 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3001 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3002 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3003 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3004 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3005 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3006 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3007 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3008 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3009 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3010 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3011 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3012 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3013 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3014 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3015 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3016 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3017 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3018 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3019 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3020 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3021 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3022 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3023 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3024 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3025 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3026 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3027 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3028 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3029 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3030 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3031 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3032 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3033 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3034 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3035 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3036 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3037 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3038 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3039 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3040 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3041 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3042 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3043 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3044 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3045 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3046 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3047 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3048 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3049 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3050 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3051 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3052 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3053 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3054 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3055 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3056 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3057 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3058 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3059 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3060 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3061 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3062 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3063 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3064 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3065 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3066 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3067 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3068 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3069 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3070 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3071 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3072 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3073 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3074 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3075 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3076 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3077 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3078 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3079 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3080 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3081 
3082 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3083 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3084 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3085 				NUM_BANKS(ADDR_SURF_16_BANK));
3086 
3087 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3088 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3089 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3090 				NUM_BANKS(ADDR_SURF_16_BANK));
3091 
3092 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3093 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3094 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3095 				NUM_BANKS(ADDR_SURF_16_BANK));
3096 
3097 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3098 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3099 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3100 				NUM_BANKS(ADDR_SURF_16_BANK));
3101 
3102 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3103 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3104 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3105 				NUM_BANKS(ADDR_SURF_16_BANK));
3106 
3107 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3108 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3109 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3110 				NUM_BANKS(ADDR_SURF_16_BANK));
3111 
3112 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3113 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3114 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3115 				NUM_BANKS(ADDR_SURF_16_BANK));
3116 
3117 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3118 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3119 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3120 				NUM_BANKS(ADDR_SURF_16_BANK));
3121 
3122 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3123 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3124 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3125 				NUM_BANKS(ADDR_SURF_16_BANK));
3126 
3127 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3128 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3129 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3130 				NUM_BANKS(ADDR_SURF_16_BANK));
3131 
3132 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3133 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3134 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3135 				NUM_BANKS(ADDR_SURF_16_BANK));
3136 
3137 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3138 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3139 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3140 				NUM_BANKS(ADDR_SURF_8_BANK));
3141 
3142 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3143 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3144 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3145 				NUM_BANKS(ADDR_SURF_4_BANK));
3146 
3147 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3148 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3149 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3150 				NUM_BANKS(ADDR_SURF_4_BANK));
3151 
3152 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3153 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3154 
3155 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3156 			if (reg_offset != 7)
3157 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3158 
3159 		break;
3160 	case CHIP_STONEY:
3161 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3162 				PIPE_CONFIG(ADDR_SURF_P2) |
3163 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3164 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3165 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3166 				PIPE_CONFIG(ADDR_SURF_P2) |
3167 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3168 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3169 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3170 				PIPE_CONFIG(ADDR_SURF_P2) |
3171 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3172 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3173 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3174 				PIPE_CONFIG(ADDR_SURF_P2) |
3175 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3176 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3177 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3178 				PIPE_CONFIG(ADDR_SURF_P2) |
3179 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3180 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3181 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3182 				PIPE_CONFIG(ADDR_SURF_P2) |
3183 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3184 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3185 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3186 				PIPE_CONFIG(ADDR_SURF_P2) |
3187 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3188 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3189 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3190 				PIPE_CONFIG(ADDR_SURF_P2));
3191 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3192 				PIPE_CONFIG(ADDR_SURF_P2) |
3193 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3194 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3195 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3196 				 PIPE_CONFIG(ADDR_SURF_P2) |
3197 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3198 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3199 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3200 				 PIPE_CONFIG(ADDR_SURF_P2) |
3201 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3202 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3203 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3204 				 PIPE_CONFIG(ADDR_SURF_P2) |
3205 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3206 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3207 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3208 				 PIPE_CONFIG(ADDR_SURF_P2) |
3209 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3210 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3211 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3212 				 PIPE_CONFIG(ADDR_SURF_P2) |
3213 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3214 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3215 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3216 				 PIPE_CONFIG(ADDR_SURF_P2) |
3217 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3218 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3219 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3220 				 PIPE_CONFIG(ADDR_SURF_P2) |
3221 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3222 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3223 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3224 				 PIPE_CONFIG(ADDR_SURF_P2) |
3225 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3226 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3227 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3228 				 PIPE_CONFIG(ADDR_SURF_P2) |
3229 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3230 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3231 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3232 				 PIPE_CONFIG(ADDR_SURF_P2) |
3233 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3234 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3235 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3236 				 PIPE_CONFIG(ADDR_SURF_P2) |
3237 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3238 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3239 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3240 				 PIPE_CONFIG(ADDR_SURF_P2) |
3241 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3242 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3243 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3244 				 PIPE_CONFIG(ADDR_SURF_P2) |
3245 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3246 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3247 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3248 				 PIPE_CONFIG(ADDR_SURF_P2) |
3249 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3250 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3251 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3252 				 PIPE_CONFIG(ADDR_SURF_P2) |
3253 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3254 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3255 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3256 				 PIPE_CONFIG(ADDR_SURF_P2) |
3257 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3258 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3259 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3260 				 PIPE_CONFIG(ADDR_SURF_P2) |
3261 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3262 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3263 
3264 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3265 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3266 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3267 				NUM_BANKS(ADDR_SURF_8_BANK));
3268 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3269 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3270 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3271 				NUM_BANKS(ADDR_SURF_8_BANK));
3272 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3273 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3274 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3275 				NUM_BANKS(ADDR_SURF_8_BANK));
3276 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3277 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3278 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3279 				NUM_BANKS(ADDR_SURF_8_BANK));
3280 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3281 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3282 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3283 				NUM_BANKS(ADDR_SURF_8_BANK));
3284 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3285 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3286 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3287 				NUM_BANKS(ADDR_SURF_8_BANK));
3288 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3289 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3290 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3291 				NUM_BANKS(ADDR_SURF_8_BANK));
3292 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3293 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3294 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3295 				NUM_BANKS(ADDR_SURF_16_BANK));
3296 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3297 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3298 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3299 				NUM_BANKS(ADDR_SURF_16_BANK));
3300 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3301 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3302 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3303 				 NUM_BANKS(ADDR_SURF_16_BANK));
3304 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3305 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3306 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3307 				 NUM_BANKS(ADDR_SURF_16_BANK));
3308 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3309 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3310 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3311 				 NUM_BANKS(ADDR_SURF_16_BANK));
3312 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3313 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3314 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3315 				 NUM_BANKS(ADDR_SURF_16_BANK));
3316 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3317 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3318 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3319 				 NUM_BANKS(ADDR_SURF_8_BANK));
3320 
3321 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3322 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3323 			    reg_offset != 23)
3324 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3325 
3326 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3327 			if (reg_offset != 7)
3328 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3329 
3330 		break;
3331 	default:
3332 		dev_warn(adev->dev,
3333 			 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3334 			 adev->asic_type);
3335 
3336 	case CHIP_CARRIZO:
3337 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3338 				PIPE_CONFIG(ADDR_SURF_P2) |
3339 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3340 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3341 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3342 				PIPE_CONFIG(ADDR_SURF_P2) |
3343 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3344 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3345 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3346 				PIPE_CONFIG(ADDR_SURF_P2) |
3347 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3348 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3349 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3350 				PIPE_CONFIG(ADDR_SURF_P2) |
3351 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3352 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3353 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3354 				PIPE_CONFIG(ADDR_SURF_P2) |
3355 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3356 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3357 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3358 				PIPE_CONFIG(ADDR_SURF_P2) |
3359 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3360 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3361 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3362 				PIPE_CONFIG(ADDR_SURF_P2) |
3363 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3364 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3365 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3366 				PIPE_CONFIG(ADDR_SURF_P2));
3367 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3368 				PIPE_CONFIG(ADDR_SURF_P2) |
3369 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3370 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3371 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3372 				 PIPE_CONFIG(ADDR_SURF_P2) |
3373 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3374 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3375 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3376 				 PIPE_CONFIG(ADDR_SURF_P2) |
3377 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3378 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3379 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3380 				 PIPE_CONFIG(ADDR_SURF_P2) |
3381 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3382 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3383 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3384 				 PIPE_CONFIG(ADDR_SURF_P2) |
3385 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3386 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3387 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3388 				 PIPE_CONFIG(ADDR_SURF_P2) |
3389 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3390 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3391 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3392 				 PIPE_CONFIG(ADDR_SURF_P2) |
3393 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3394 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3395 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3396 				 PIPE_CONFIG(ADDR_SURF_P2) |
3397 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3398 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3399 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3400 				 PIPE_CONFIG(ADDR_SURF_P2) |
3401 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3402 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3403 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3404 				 PIPE_CONFIG(ADDR_SURF_P2) |
3405 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3406 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3407 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3408 				 PIPE_CONFIG(ADDR_SURF_P2) |
3409 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3410 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3411 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3412 				 PIPE_CONFIG(ADDR_SURF_P2) |
3413 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3414 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3415 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3416 				 PIPE_CONFIG(ADDR_SURF_P2) |
3417 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3418 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3419 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3420 				 PIPE_CONFIG(ADDR_SURF_P2) |
3421 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3422 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3423 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3424 				 PIPE_CONFIG(ADDR_SURF_P2) |
3425 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3426 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3427 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3428 				 PIPE_CONFIG(ADDR_SURF_P2) |
3429 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3430 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3431 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3432 				 PIPE_CONFIG(ADDR_SURF_P2) |
3433 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3434 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3435 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3436 				 PIPE_CONFIG(ADDR_SURF_P2) |
3437 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3438 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3439 
3440 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3441 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3442 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3443 				NUM_BANKS(ADDR_SURF_8_BANK));
3444 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3445 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3446 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3447 				NUM_BANKS(ADDR_SURF_8_BANK));
3448 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3449 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3450 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3451 				NUM_BANKS(ADDR_SURF_8_BANK));
3452 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3453 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3454 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3455 				NUM_BANKS(ADDR_SURF_8_BANK));
3456 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3457 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3458 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3459 				NUM_BANKS(ADDR_SURF_8_BANK));
3460 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3461 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3462 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3463 				NUM_BANKS(ADDR_SURF_8_BANK));
3464 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3465 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3466 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3467 				NUM_BANKS(ADDR_SURF_8_BANK));
3468 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3469 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3470 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3471 				NUM_BANKS(ADDR_SURF_16_BANK));
3472 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3473 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3474 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3475 				NUM_BANKS(ADDR_SURF_16_BANK));
3476 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3477 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3478 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3479 				 NUM_BANKS(ADDR_SURF_16_BANK));
3480 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3481 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3482 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3483 				 NUM_BANKS(ADDR_SURF_16_BANK));
3484 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3485 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3486 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3487 				 NUM_BANKS(ADDR_SURF_16_BANK));
3488 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3489 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3490 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3491 				 NUM_BANKS(ADDR_SURF_16_BANK));
3492 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3493 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3494 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3495 				 NUM_BANKS(ADDR_SURF_8_BANK));
3496 
3497 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3498 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3499 			    reg_offset != 23)
3500 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3501 
3502 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3503 			if (reg_offset != 7)
3504 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3505 
3506 		break;
3507 	}
3508 }
3509 
3510 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3511 				  u32 se_num, u32 sh_num, u32 instance)
3512 {
3513 	u32 data;
3514 
3515 	if (instance == 0xffffffff)
3516 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3517 	else
3518 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3519 
3520 	if (se_num == 0xffffffff)
3521 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3522 	else
3523 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3524 
3525 	if (sh_num == 0xffffffff)
3526 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3527 	else
3528 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3529 
3530 	WREG32(mmGRBM_GFX_INDEX, data);
3531 }
3532 
3533 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3534 				  u32 me, u32 pipe, u32 q)
3535 {
3536 	vi_srbm_select(adev, me, pipe, q, 0);
3537 }
3538 
3539 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3540 {
3541 	u32 data, mask;
3542 
3543 	data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3544 		RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3545 
3546 	data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3547 
3548 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3549 					 adev->gfx.config.max_sh_per_se);
3550 
3551 	return (~data) & mask;
3552 }
3553 
3554 static void
3555 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3556 {
3557 	switch (adev->asic_type) {
3558 	case CHIP_FIJI:
3559 	case CHIP_VEGAM:
3560 		*rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3561 			  RB_XSEL2(1) | PKR_MAP(2) |
3562 			  PKR_XSEL(1) | PKR_YSEL(1) |
3563 			  SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3564 		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3565 			   SE_PAIR_YSEL(2);
3566 		break;
3567 	case CHIP_TONGA:
3568 	case CHIP_POLARIS10:
3569 		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3570 			  SE_XSEL(1) | SE_YSEL(1);
3571 		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3572 			   SE_PAIR_YSEL(2);
3573 		break;
3574 	case CHIP_TOPAZ:
3575 	case CHIP_CARRIZO:
3576 		*rconf |= RB_MAP_PKR0(2);
3577 		*rconf1 |= 0x0;
3578 		break;
3579 	case CHIP_POLARIS11:
3580 	case CHIP_POLARIS12:
3581 		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3582 			  SE_XSEL(1) | SE_YSEL(1);
3583 		*rconf1 |= 0x0;
3584 		break;
3585 	case CHIP_STONEY:
3586 		*rconf |= 0x0;
3587 		*rconf1 |= 0x0;
3588 		break;
3589 	default:
3590 		DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3591 		break;
3592 	}
3593 }
3594 
3595 static void
3596 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3597 					u32 raster_config, u32 raster_config_1,
3598 					unsigned rb_mask, unsigned num_rb)
3599 {
3600 	unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3601 	unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3602 	unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3603 	unsigned rb_per_se = num_rb / num_se;
3604 	unsigned se_mask[4];
3605 	unsigned se;
3606 
3607 	se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3608 	se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3609 	se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3610 	se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3611 
3612 	WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3613 	WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3614 	WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3615 
3616 	if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3617 			     (!se_mask[2] && !se_mask[3]))) {
3618 		raster_config_1 &= ~SE_PAIR_MAP_MASK;
3619 
3620 		if (!se_mask[0] && !se_mask[1]) {
3621 			raster_config_1 |=
3622 				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3623 		} else {
3624 			raster_config_1 |=
3625 				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3626 		}
3627 	}
3628 
3629 	for (se = 0; se < num_se; se++) {
3630 		unsigned raster_config_se = raster_config;
3631 		unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3632 		unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3633 		int idx = (se / 2) * 2;
3634 
3635 		if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3636 			raster_config_se &= ~SE_MAP_MASK;
3637 
3638 			if (!se_mask[idx]) {
3639 				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3640 			} else {
3641 				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3642 			}
3643 		}
3644 
3645 		pkr0_mask &= rb_mask;
3646 		pkr1_mask &= rb_mask;
3647 		if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3648 			raster_config_se &= ~PKR_MAP_MASK;
3649 
3650 			if (!pkr0_mask) {
3651 				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3652 			} else {
3653 				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3654 			}
3655 		}
3656 
3657 		if (rb_per_se >= 2) {
3658 			unsigned rb0_mask = 1 << (se * rb_per_se);
3659 			unsigned rb1_mask = rb0_mask << 1;
3660 
3661 			rb0_mask &= rb_mask;
3662 			rb1_mask &= rb_mask;
3663 			if (!rb0_mask || !rb1_mask) {
3664 				raster_config_se &= ~RB_MAP_PKR0_MASK;
3665 
3666 				if (!rb0_mask) {
3667 					raster_config_se |=
3668 						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3669 				} else {
3670 					raster_config_se |=
3671 						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3672 				}
3673 			}
3674 
3675 			if (rb_per_se > 2) {
3676 				rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3677 				rb1_mask = rb0_mask << 1;
3678 				rb0_mask &= rb_mask;
3679 				rb1_mask &= rb_mask;
3680 				if (!rb0_mask || !rb1_mask) {
3681 					raster_config_se &= ~RB_MAP_PKR1_MASK;
3682 
3683 					if (!rb0_mask) {
3684 						raster_config_se |=
3685 							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3686 					} else {
3687 						raster_config_se |=
3688 							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3689 					}
3690 				}
3691 			}
3692 		}
3693 
3694 		/* GRBM_GFX_INDEX has a different offset on VI */
3695 		gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3696 		WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3697 		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3698 	}
3699 
3700 	/* GRBM_GFX_INDEX has a different offset on VI */
3701 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3702 }
3703 
3704 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3705 {
3706 	int i, j;
3707 	u32 data;
3708 	u32 raster_config = 0, raster_config_1 = 0;
3709 	u32 active_rbs = 0;
3710 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3711 					adev->gfx.config.max_sh_per_se;
3712 	unsigned num_rb_pipes;
3713 
3714 	mutex_lock(&adev->grbm_idx_mutex);
3715 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3716 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3717 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3718 			data = gfx_v8_0_get_rb_active_bitmap(adev);
3719 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3720 					       rb_bitmap_width_per_sh);
3721 		}
3722 	}
3723 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3724 
3725 	adev->gfx.config.backend_enable_mask = active_rbs;
3726 	adev->gfx.config.num_rbs = hweight32(active_rbs);
3727 
3728 	num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3729 			     adev->gfx.config.max_shader_engines, 16);
3730 
3731 	gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3732 
3733 	if (!adev->gfx.config.backend_enable_mask ||
3734 			adev->gfx.config.num_rbs >= num_rb_pipes) {
3735 		WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3736 		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3737 	} else {
3738 		gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3739 							adev->gfx.config.backend_enable_mask,
3740 							num_rb_pipes);
3741 	}
3742 
3743 	/* cache the values for userspace */
3744 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3745 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3746 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3747 			adev->gfx.config.rb_config[i][j].rb_backend_disable =
3748 				RREG32(mmCC_RB_BACKEND_DISABLE);
3749 			adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3750 				RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3751 			adev->gfx.config.rb_config[i][j].raster_config =
3752 				RREG32(mmPA_SC_RASTER_CONFIG);
3753 			adev->gfx.config.rb_config[i][j].raster_config_1 =
3754 				RREG32(mmPA_SC_RASTER_CONFIG_1);
3755 		}
3756 	}
3757 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3758 	mutex_unlock(&adev->grbm_idx_mutex);
3759 }
3760 
3761 /**
3762  * gfx_v8_0_init_compute_vmid - gart enable
3763  *
3764  * @adev: amdgpu_device pointer
3765  *
3766  * Initialize compute vmid sh_mem registers
3767  *
3768  */
3769 #define DEFAULT_SH_MEM_BASES	(0x6000)
3770 #define FIRST_COMPUTE_VMID	(8)
3771 #define LAST_COMPUTE_VMID	(16)
3772 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3773 {
3774 	int i;
3775 	uint32_t sh_mem_config;
3776 	uint32_t sh_mem_bases;
3777 
3778 	/*
3779 	 * Configure apertures:
3780 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3781 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3782 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3783 	 */
3784 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3785 
3786 	sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3787 			SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3788 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3789 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3790 			MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3791 			SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3792 
3793 	mutex_lock(&adev->srbm_mutex);
3794 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3795 		vi_srbm_select(adev, 0, 0, 0, i);
3796 		/* CP and shaders */
3797 		WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3798 		WREG32(mmSH_MEM_APE1_BASE, 1);
3799 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3800 		WREG32(mmSH_MEM_BASES, sh_mem_bases);
3801 	}
3802 	vi_srbm_select(adev, 0, 0, 0, 0);
3803 	mutex_unlock(&adev->srbm_mutex);
3804 }
3805 
3806 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3807 {
3808 	switch (adev->asic_type) {
3809 	default:
3810 		adev->gfx.config.double_offchip_lds_buf = 1;
3811 		break;
3812 	case CHIP_CARRIZO:
3813 	case CHIP_STONEY:
3814 		adev->gfx.config.double_offchip_lds_buf = 0;
3815 		break;
3816 	}
3817 }
3818 
3819 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3820 {
3821 	u32 tmp, sh_static_mem_cfg;
3822 	int i;
3823 
3824 	WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3825 	WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3826 	WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3827 	WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3828 
3829 	gfx_v8_0_tiling_mode_table_init(adev);
3830 	gfx_v8_0_setup_rb(adev);
3831 	gfx_v8_0_get_cu_info(adev);
3832 	gfx_v8_0_config_init(adev);
3833 
3834 	/* XXX SH_MEM regs */
3835 	/* where to put LDS, scratch, GPUVM in FSA64 space */
3836 	sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3837 				   SWIZZLE_ENABLE, 1);
3838 	sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3839 				   ELEMENT_SIZE, 1);
3840 	sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3841 				   INDEX_STRIDE, 3);
3842 	WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3843 
3844 	mutex_lock(&adev->srbm_mutex);
3845 	for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3846 		vi_srbm_select(adev, 0, 0, 0, i);
3847 		/* CP and shaders */
3848 		if (i == 0) {
3849 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3850 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3851 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3852 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3853 			WREG32(mmSH_MEM_CONFIG, tmp);
3854 			WREG32(mmSH_MEM_BASES, 0);
3855 		} else {
3856 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3857 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3858 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3859 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3860 			WREG32(mmSH_MEM_CONFIG, tmp);
3861 			tmp = adev->gmc.shared_aperture_start >> 48;
3862 			WREG32(mmSH_MEM_BASES, tmp);
3863 		}
3864 
3865 		WREG32(mmSH_MEM_APE1_BASE, 1);
3866 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3867 	}
3868 	vi_srbm_select(adev, 0, 0, 0, 0);
3869 	mutex_unlock(&adev->srbm_mutex);
3870 
3871 	gfx_v8_0_init_compute_vmid(adev);
3872 
3873 	mutex_lock(&adev->grbm_idx_mutex);
3874 	/*
3875 	 * making sure that the following register writes will be broadcasted
3876 	 * to all the shaders
3877 	 */
3878 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3879 
3880 	WREG32(mmPA_SC_FIFO_SIZE,
3881 		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
3882 			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3883 		   (adev->gfx.config.sc_prim_fifo_size_backend <<
3884 			PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3885 		   (adev->gfx.config.sc_hiz_tile_fifo_size <<
3886 			PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3887 		   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3888 			PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3889 
3890 	tmp = RREG32(mmSPI_ARB_PRIORITY);
3891 	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3892 	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3893 	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3894 	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3895 	WREG32(mmSPI_ARB_PRIORITY, tmp);
3896 
3897 	mutex_unlock(&adev->grbm_idx_mutex);
3898 
3899 }
3900 
3901 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3902 {
3903 	u32 i, j, k;
3904 	u32 mask;
3905 
3906 	mutex_lock(&adev->grbm_idx_mutex);
3907 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3908 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3909 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3910 			for (k = 0; k < adev->usec_timeout; k++) {
3911 				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3912 					break;
3913 				udelay(1);
3914 			}
3915 			if (k == adev->usec_timeout) {
3916 				gfx_v8_0_select_se_sh(adev, 0xffffffff,
3917 						      0xffffffff, 0xffffffff);
3918 				mutex_unlock(&adev->grbm_idx_mutex);
3919 				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3920 					 i, j);
3921 				return;
3922 			}
3923 		}
3924 	}
3925 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3926 	mutex_unlock(&adev->grbm_idx_mutex);
3927 
3928 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3929 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3930 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3931 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3932 	for (k = 0; k < adev->usec_timeout; k++) {
3933 		if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3934 			break;
3935 		udelay(1);
3936 	}
3937 }
3938 
3939 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3940 					       bool enable)
3941 {
3942 	u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3943 
3944 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3945 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3946 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3947 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3948 
3949 	WREG32(mmCP_INT_CNTL_RING0, tmp);
3950 }
3951 
3952 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3953 {
3954 	/* csib */
3955 	WREG32(mmRLC_CSIB_ADDR_HI,
3956 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
3957 	WREG32(mmRLC_CSIB_ADDR_LO,
3958 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3959 	WREG32(mmRLC_CSIB_LENGTH,
3960 			adev->gfx.rlc.clear_state_size);
3961 }
3962 
3963 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3964 				int ind_offset,
3965 				int list_size,
3966 				int *unique_indices,
3967 				int *indices_count,
3968 				int max_indices,
3969 				int *ind_start_offsets,
3970 				int *offset_count,
3971 				int max_offset)
3972 {
3973 	int indices;
3974 	bool new_entry = true;
3975 
3976 	for (; ind_offset < list_size; ind_offset++) {
3977 
3978 		if (new_entry) {
3979 			new_entry = false;
3980 			ind_start_offsets[*offset_count] = ind_offset;
3981 			*offset_count = *offset_count + 1;
3982 			BUG_ON(*offset_count >= max_offset);
3983 		}
3984 
3985 		if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3986 			new_entry = true;
3987 			continue;
3988 		}
3989 
3990 		ind_offset += 2;
3991 
3992 		/* look for the matching indice */
3993 		for (indices = 0;
3994 			indices < *indices_count;
3995 			indices++) {
3996 			if (unique_indices[indices] ==
3997 				register_list_format[ind_offset])
3998 				break;
3999 		}
4000 
4001 		if (indices >= *indices_count) {
4002 			unique_indices[*indices_count] =
4003 				register_list_format[ind_offset];
4004 			indices = *indices_count;
4005 			*indices_count = *indices_count + 1;
4006 			BUG_ON(*indices_count >= max_indices);
4007 		}
4008 
4009 		register_list_format[ind_offset] = indices;
4010 	}
4011 }
4012 
4013 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
4014 {
4015 	int i, temp, data;
4016 	int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
4017 	int indices_count = 0;
4018 	int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
4019 	int offset_count = 0;
4020 
4021 	int list_size;
4022 	unsigned int *register_list_format =
4023 		kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
4024 	if (!register_list_format)
4025 		return -ENOMEM;
4026 	memcpy(register_list_format, adev->gfx.rlc.register_list_format,
4027 			adev->gfx.rlc.reg_list_format_size_bytes);
4028 
4029 	gfx_v8_0_parse_ind_reg_list(register_list_format,
4030 				RLC_FormatDirectRegListLength,
4031 				adev->gfx.rlc.reg_list_format_size_bytes >> 2,
4032 				unique_indices,
4033 				&indices_count,
4034 				ARRAY_SIZE(unique_indices),
4035 				indirect_start_offsets,
4036 				&offset_count,
4037 				ARRAY_SIZE(indirect_start_offsets));
4038 
4039 	/* save and restore list */
4040 	WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
4041 
4042 	WREG32(mmRLC_SRM_ARAM_ADDR, 0);
4043 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
4044 		WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4045 
4046 	/* indirect list */
4047 	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4048 	for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4049 		WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4050 
4051 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4052 	list_size = list_size >> 1;
4053 	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4054 	WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4055 
4056 	/* starting offsets starts */
4057 	WREG32(mmRLC_GPM_SCRATCH_ADDR,
4058 		adev->gfx.rlc.starting_offsets_start);
4059 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
4060 		WREG32(mmRLC_GPM_SCRATCH_DATA,
4061 				indirect_start_offsets[i]);
4062 
4063 	/* unique indices */
4064 	temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4065 	data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4066 	for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4067 		if (unique_indices[i] != 0) {
4068 			WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4069 			WREG32(data + i, unique_indices[i] >> 20);
4070 		}
4071 	}
4072 	kfree(register_list_format);
4073 
4074 	return 0;
4075 }
4076 
4077 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4078 {
4079 	WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4080 }
4081 
4082 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4083 {
4084 	uint32_t data;
4085 
4086 	WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4087 
4088 	data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4089 	data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4090 	data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4091 	data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4092 	WREG32(mmRLC_PG_DELAY, data);
4093 
4094 	WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4095 	WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4096 
4097 }
4098 
4099 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4100 						bool enable)
4101 {
4102 	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4103 }
4104 
4105 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4106 						  bool enable)
4107 {
4108 	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4109 }
4110 
4111 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4112 {
4113 	WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4114 }
4115 
4116 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4117 {
4118 	if ((adev->asic_type == CHIP_CARRIZO) ||
4119 	    (adev->asic_type == CHIP_STONEY)) {
4120 		gfx_v8_0_init_csb(adev);
4121 		gfx_v8_0_init_save_restore_list(adev);
4122 		gfx_v8_0_enable_save_restore_machine(adev);
4123 		WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4124 		gfx_v8_0_init_power_gating(adev);
4125 		WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4126 	} else if ((adev->asic_type == CHIP_POLARIS11) ||
4127 		   (adev->asic_type == CHIP_POLARIS12) ||
4128 		   (adev->asic_type == CHIP_VEGAM)) {
4129 		gfx_v8_0_init_csb(adev);
4130 		gfx_v8_0_init_save_restore_list(adev);
4131 		gfx_v8_0_enable_save_restore_machine(adev);
4132 		gfx_v8_0_init_power_gating(adev);
4133 	}
4134 
4135 }
4136 
4137 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4138 {
4139 	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4140 
4141 	gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4142 	gfx_v8_0_wait_for_rlc_serdes(adev);
4143 }
4144 
4145 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4146 {
4147 	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4148 	udelay(50);
4149 
4150 	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4151 	udelay(50);
4152 }
4153 
4154 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4155 {
4156 	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4157 
4158 	/* carrizo do enable cp interrupt after cp inited */
4159 	if (!(adev->flags & AMD_IS_APU))
4160 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4161 
4162 	udelay(50);
4163 }
4164 
4165 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4166 {
4167 	const struct rlc_firmware_header_v2_0 *hdr;
4168 	const __le32 *fw_data;
4169 	unsigned i, fw_size;
4170 
4171 	if (!adev->gfx.rlc_fw)
4172 		return -EINVAL;
4173 
4174 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4175 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
4176 
4177 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4178 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4179 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4180 
4181 	WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4182 	for (i = 0; i < fw_size; i++)
4183 		WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4184 	WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4185 
4186 	return 0;
4187 }
4188 
4189 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4190 {
4191 	int r;
4192 	u32 tmp;
4193 
4194 	gfx_v8_0_rlc_stop(adev);
4195 
4196 	/* disable CG */
4197 	tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4198 	tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4199 		 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4200 	WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4201 	if (adev->asic_type == CHIP_POLARIS11 ||
4202 	    adev->asic_type == CHIP_POLARIS10 ||
4203 	    adev->asic_type == CHIP_POLARIS12 ||
4204 	    adev->asic_type == CHIP_VEGAM) {
4205 		tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4206 		tmp &= ~0x3;
4207 		WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4208 	}
4209 
4210 	/* disable PG */
4211 	WREG32(mmRLC_PG_CNTL, 0);
4212 
4213 	gfx_v8_0_rlc_reset(adev);
4214 	gfx_v8_0_init_pg(adev);
4215 
4216 
4217 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4218 		/* legacy rlc firmware loading */
4219 		r = gfx_v8_0_rlc_load_microcode(adev);
4220 		if (r)
4221 			return r;
4222 	}
4223 
4224 	gfx_v8_0_rlc_start(adev);
4225 
4226 	return 0;
4227 }
4228 
4229 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4230 {
4231 	int i;
4232 	u32 tmp = RREG32(mmCP_ME_CNTL);
4233 
4234 	if (enable) {
4235 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4236 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4237 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4238 	} else {
4239 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4240 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4241 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4242 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4243 			adev->gfx.gfx_ring[i].ready = false;
4244 	}
4245 	WREG32(mmCP_ME_CNTL, tmp);
4246 	udelay(50);
4247 }
4248 
4249 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4250 {
4251 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
4252 	const struct gfx_firmware_header_v1_0 *ce_hdr;
4253 	const struct gfx_firmware_header_v1_0 *me_hdr;
4254 	const __le32 *fw_data;
4255 	unsigned i, fw_size;
4256 
4257 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4258 		return -EINVAL;
4259 
4260 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4261 		adev->gfx.pfp_fw->data;
4262 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4263 		adev->gfx.ce_fw->data;
4264 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
4265 		adev->gfx.me_fw->data;
4266 
4267 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4268 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4269 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4270 
4271 	gfx_v8_0_cp_gfx_enable(adev, false);
4272 
4273 	/* PFP */
4274 	fw_data = (const __le32 *)
4275 		(adev->gfx.pfp_fw->data +
4276 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4277 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4278 	WREG32(mmCP_PFP_UCODE_ADDR, 0);
4279 	for (i = 0; i < fw_size; i++)
4280 		WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4281 	WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4282 
4283 	/* CE */
4284 	fw_data = (const __le32 *)
4285 		(adev->gfx.ce_fw->data +
4286 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4287 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4288 	WREG32(mmCP_CE_UCODE_ADDR, 0);
4289 	for (i = 0; i < fw_size; i++)
4290 		WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4291 	WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4292 
4293 	/* ME */
4294 	fw_data = (const __le32 *)
4295 		(adev->gfx.me_fw->data +
4296 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4297 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4298 	WREG32(mmCP_ME_RAM_WADDR, 0);
4299 	for (i = 0; i < fw_size; i++)
4300 		WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4301 	WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4302 
4303 	return 0;
4304 }
4305 
4306 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4307 {
4308 	u32 count = 0;
4309 	const struct cs_section_def *sect = NULL;
4310 	const struct cs_extent_def *ext = NULL;
4311 
4312 	/* begin clear state */
4313 	count += 2;
4314 	/* context control state */
4315 	count += 3;
4316 
4317 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4318 		for (ext = sect->section; ext->extent != NULL; ++ext) {
4319 			if (sect->id == SECT_CONTEXT)
4320 				count += 2 + ext->reg_count;
4321 			else
4322 				return 0;
4323 		}
4324 	}
4325 	/* pa_sc_raster_config/pa_sc_raster_config1 */
4326 	count += 4;
4327 	/* end clear state */
4328 	count += 2;
4329 	/* clear state */
4330 	count += 2;
4331 
4332 	return count;
4333 }
4334 
4335 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4336 {
4337 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4338 	const struct cs_section_def *sect = NULL;
4339 	const struct cs_extent_def *ext = NULL;
4340 	int r, i;
4341 
4342 	/* init the CP */
4343 	WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4344 	WREG32(mmCP_ENDIAN_SWAP, 0);
4345 	WREG32(mmCP_DEVICE_ID, 1);
4346 
4347 	gfx_v8_0_cp_gfx_enable(adev, true);
4348 
4349 	r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4350 	if (r) {
4351 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4352 		return r;
4353 	}
4354 
4355 	/* clear state buffer */
4356 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4357 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4358 
4359 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4360 	amdgpu_ring_write(ring, 0x80000000);
4361 	amdgpu_ring_write(ring, 0x80000000);
4362 
4363 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4364 		for (ext = sect->section; ext->extent != NULL; ++ext) {
4365 			if (sect->id == SECT_CONTEXT) {
4366 				amdgpu_ring_write(ring,
4367 				       PACKET3(PACKET3_SET_CONTEXT_REG,
4368 					       ext->reg_count));
4369 				amdgpu_ring_write(ring,
4370 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4371 				for (i = 0; i < ext->reg_count; i++)
4372 					amdgpu_ring_write(ring, ext->extent[i]);
4373 			}
4374 		}
4375 	}
4376 
4377 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4378 	amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4379 	amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4380 	amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4381 
4382 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4383 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4384 
4385 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4386 	amdgpu_ring_write(ring, 0);
4387 
4388 	/* init the CE partitions */
4389 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4390 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4391 	amdgpu_ring_write(ring, 0x8000);
4392 	amdgpu_ring_write(ring, 0x8000);
4393 
4394 	amdgpu_ring_commit(ring);
4395 
4396 	return 0;
4397 }
4398 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4399 {
4400 	u32 tmp;
4401 	/* no gfx doorbells on iceland */
4402 	if (adev->asic_type == CHIP_TOPAZ)
4403 		return;
4404 
4405 	tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4406 
4407 	if (ring->use_doorbell) {
4408 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4409 				DOORBELL_OFFSET, ring->doorbell_index);
4410 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4411 						DOORBELL_HIT, 0);
4412 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4413 					    DOORBELL_EN, 1);
4414 	} else {
4415 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4416 	}
4417 
4418 	WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4419 
4420 	if (adev->flags & AMD_IS_APU)
4421 		return;
4422 
4423 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4424 					DOORBELL_RANGE_LOWER,
4425 					AMDGPU_DOORBELL_GFX_RING0);
4426 	WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4427 
4428 	WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4429 		CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4430 }
4431 
4432 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4433 {
4434 	struct amdgpu_ring *ring;
4435 	u32 tmp;
4436 	u32 rb_bufsz;
4437 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
4438 	int r;
4439 
4440 	/* Set the write pointer delay */
4441 	WREG32(mmCP_RB_WPTR_DELAY, 0);
4442 
4443 	/* set the RB to use vmid 0 */
4444 	WREG32(mmCP_RB_VMID, 0);
4445 
4446 	/* Set ring buffer size */
4447 	ring = &adev->gfx.gfx_ring[0];
4448 	rb_bufsz = order_base_2(ring->ring_size / 8);
4449 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4450 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4451 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4452 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4453 #ifdef __BIG_ENDIAN
4454 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4455 #endif
4456 	WREG32(mmCP_RB0_CNTL, tmp);
4457 
4458 	/* Initialize the ring buffer's read and write pointers */
4459 	WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4460 	ring->wptr = 0;
4461 	WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4462 
4463 	/* set the wb address wether it's enabled or not */
4464 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4465 	WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4466 	WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4467 
4468 	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4469 	WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4470 	WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4471 	mdelay(1);
4472 	WREG32(mmCP_RB0_CNTL, tmp);
4473 
4474 	rb_addr = ring->gpu_addr >> 8;
4475 	WREG32(mmCP_RB0_BASE, rb_addr);
4476 	WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4477 
4478 	gfx_v8_0_set_cpg_door_bell(adev, ring);
4479 	/* start the ring */
4480 	amdgpu_ring_clear_ring(ring);
4481 	gfx_v8_0_cp_gfx_start(adev);
4482 	ring->ready = true;
4483 	r = amdgpu_ring_test_ring(ring);
4484 	if (r)
4485 		ring->ready = false;
4486 
4487 	return r;
4488 }
4489 
4490 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4491 {
4492 	int i;
4493 
4494 	if (enable) {
4495 		WREG32(mmCP_MEC_CNTL, 0);
4496 	} else {
4497 		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4498 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
4499 			adev->gfx.compute_ring[i].ready = false;
4500 		adev->gfx.kiq.ring.ready = false;
4501 	}
4502 	udelay(50);
4503 }
4504 
4505 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4506 {
4507 	const struct gfx_firmware_header_v1_0 *mec_hdr;
4508 	const __le32 *fw_data;
4509 	unsigned i, fw_size;
4510 
4511 	if (!adev->gfx.mec_fw)
4512 		return -EINVAL;
4513 
4514 	gfx_v8_0_cp_compute_enable(adev, false);
4515 
4516 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4517 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4518 
4519 	fw_data = (const __le32 *)
4520 		(adev->gfx.mec_fw->data +
4521 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4522 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4523 
4524 	/* MEC1 */
4525 	WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4526 	for (i = 0; i < fw_size; i++)
4527 		WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4528 	WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4529 
4530 	/* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4531 	if (adev->gfx.mec2_fw) {
4532 		const struct gfx_firmware_header_v1_0 *mec2_hdr;
4533 
4534 		mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4535 		amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4536 
4537 		fw_data = (const __le32 *)
4538 			(adev->gfx.mec2_fw->data +
4539 			 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4540 		fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4541 
4542 		WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4543 		for (i = 0; i < fw_size; i++)
4544 			WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4545 		WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4546 	}
4547 
4548 	return 0;
4549 }
4550 
4551 /* KIQ functions */
4552 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4553 {
4554 	uint32_t tmp;
4555 	struct amdgpu_device *adev = ring->adev;
4556 
4557 	/* tell RLC which is KIQ queue */
4558 	tmp = RREG32(mmRLC_CP_SCHEDULERS);
4559 	tmp &= 0xffffff00;
4560 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4561 	WREG32(mmRLC_CP_SCHEDULERS, tmp);
4562 	tmp |= 0x80;
4563 	WREG32(mmRLC_CP_SCHEDULERS, tmp);
4564 }
4565 
4566 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4567 {
4568 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4569 	uint32_t scratch, tmp = 0;
4570 	uint64_t queue_mask = 0;
4571 	int r, i;
4572 
4573 	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4574 		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4575 			continue;
4576 
4577 		/* This situation may be hit in the future if a new HW
4578 		 * generation exposes more than 64 queues. If so, the
4579 		 * definition of queue_mask needs updating */
4580 		if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4581 			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4582 			break;
4583 		}
4584 
4585 		queue_mask |= (1ull << i);
4586 	}
4587 
4588 	r = amdgpu_gfx_scratch_get(adev, &scratch);
4589 	if (r) {
4590 		DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4591 		return r;
4592 	}
4593 	WREG32(scratch, 0xCAFEDEAD);
4594 
4595 	r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
4596 	if (r) {
4597 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4598 		amdgpu_gfx_scratch_free(adev, scratch);
4599 		return r;
4600 	}
4601 	/* set resources */
4602 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4603 	amdgpu_ring_write(kiq_ring, 0);	/* vmid_mask:0 queue_type:0 (KIQ) */
4604 	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
4605 	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
4606 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
4607 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
4608 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
4609 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
4610 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4611 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4612 		uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4613 		uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4614 
4615 		/* map queues */
4616 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4617 		/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4618 		amdgpu_ring_write(kiq_ring,
4619 				  PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4620 		amdgpu_ring_write(kiq_ring,
4621 				  PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4622 				  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4623 				  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4624 				  PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4625 		amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4626 		amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4627 		amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4628 		amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4629 	}
4630 	/* write to scratch for completion */
4631 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4632 	amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4633 	amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4634 	amdgpu_ring_commit(kiq_ring);
4635 
4636 	for (i = 0; i < adev->usec_timeout; i++) {
4637 		tmp = RREG32(scratch);
4638 		if (tmp == 0xDEADBEEF)
4639 			break;
4640 		DRM_UDELAY(1);
4641 	}
4642 	if (i >= adev->usec_timeout) {
4643 		DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4644 			  scratch, tmp);
4645 		r = -EINVAL;
4646 	}
4647 	amdgpu_gfx_scratch_free(adev, scratch);
4648 
4649 	return r;
4650 }
4651 
4652 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4653 {
4654 	int i, r = 0;
4655 
4656 	if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4657 		WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4658 		for (i = 0; i < adev->usec_timeout; i++) {
4659 			if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4660 				break;
4661 			udelay(1);
4662 		}
4663 		if (i == adev->usec_timeout)
4664 			r = -ETIMEDOUT;
4665 	}
4666 	WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4667 	WREG32(mmCP_HQD_PQ_RPTR, 0);
4668 	WREG32(mmCP_HQD_PQ_WPTR, 0);
4669 
4670 	return r;
4671 }
4672 
4673 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4674 {
4675 	struct amdgpu_device *adev = ring->adev;
4676 	struct vi_mqd *mqd = ring->mqd_ptr;
4677 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4678 	uint32_t tmp;
4679 
4680 	mqd->header = 0xC0310800;
4681 	mqd->compute_pipelinestat_enable = 0x00000001;
4682 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4683 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4684 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4685 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4686 	mqd->compute_misc_reserved = 0x00000003;
4687 	mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4688 						     + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4689 	mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4690 						     + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4691 	eop_base_addr = ring->eop_gpu_addr >> 8;
4692 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4693 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4694 
4695 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4696 	tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4697 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4698 			(order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4699 
4700 	mqd->cp_hqd_eop_control = tmp;
4701 
4702 	/* enable doorbell? */
4703 	tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4704 			    CP_HQD_PQ_DOORBELL_CONTROL,
4705 			    DOORBELL_EN,
4706 			    ring->use_doorbell ? 1 : 0);
4707 
4708 	mqd->cp_hqd_pq_doorbell_control = tmp;
4709 
4710 	/* set the pointer to the MQD */
4711 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4712 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4713 
4714 	/* set MQD vmid to 0 */
4715 	tmp = RREG32(mmCP_MQD_CONTROL);
4716 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4717 	mqd->cp_mqd_control = tmp;
4718 
4719 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4720 	hqd_gpu_addr = ring->gpu_addr >> 8;
4721 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4722 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4723 
4724 	/* set up the HQD, this is similar to CP_RB0_CNTL */
4725 	tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4726 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4727 			    (order_base_2(ring->ring_size / 4) - 1));
4728 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4729 			((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4730 #ifdef __BIG_ENDIAN
4731 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4732 #endif
4733 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4734 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4735 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4736 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4737 	mqd->cp_hqd_pq_control = tmp;
4738 
4739 	/* set the wb address whether it's enabled or not */
4740 	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4741 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4742 	mqd->cp_hqd_pq_rptr_report_addr_hi =
4743 		upper_32_bits(wb_gpu_addr) & 0xffff;
4744 
4745 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4746 	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4747 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4748 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4749 
4750 	tmp = 0;
4751 	/* enable the doorbell if requested */
4752 	if (ring->use_doorbell) {
4753 		tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4754 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4755 				DOORBELL_OFFSET, ring->doorbell_index);
4756 
4757 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4758 					 DOORBELL_EN, 1);
4759 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4760 					 DOORBELL_SOURCE, 0);
4761 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4762 					 DOORBELL_HIT, 0);
4763 	}
4764 
4765 	mqd->cp_hqd_pq_doorbell_control = tmp;
4766 
4767 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4768 	ring->wptr = 0;
4769 	mqd->cp_hqd_pq_wptr = ring->wptr;
4770 	mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4771 
4772 	/* set the vmid for the queue */
4773 	mqd->cp_hqd_vmid = 0;
4774 
4775 	tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4776 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4777 	mqd->cp_hqd_persistent_state = tmp;
4778 
4779 	/* set MTYPE */
4780 	tmp = RREG32(mmCP_HQD_IB_CONTROL);
4781 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4782 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4783 	mqd->cp_hqd_ib_control = tmp;
4784 
4785 	tmp = RREG32(mmCP_HQD_IQ_TIMER);
4786 	tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4787 	mqd->cp_hqd_iq_timer = tmp;
4788 
4789 	tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4790 	tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4791 	mqd->cp_hqd_ctx_save_control = tmp;
4792 
4793 	/* defaults */
4794 	mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4795 	mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4796 	mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4797 	mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4798 	mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4799 	mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4800 	mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4801 	mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4802 	mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4803 	mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4804 	mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4805 	mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4806 	mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4807 	mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4808 	mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4809 
4810 	/* activate the queue */
4811 	mqd->cp_hqd_active = 1;
4812 
4813 	return 0;
4814 }
4815 
4816 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4817 			struct vi_mqd *mqd)
4818 {
4819 	uint32_t mqd_reg;
4820 	uint32_t *mqd_data;
4821 
4822 	/* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4823 	mqd_data = &mqd->cp_mqd_base_addr_lo;
4824 
4825 	/* disable wptr polling */
4826 	WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4827 
4828 	/* program all HQD registers */
4829 	for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4830 		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4831 
4832 	/* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4833 	 * This is safe since EOP RPTR==WPTR for any inactive HQD
4834 	 * on ASICs that do not support context-save.
4835 	 * EOP writes/reads can start anywhere in the ring.
4836 	 */
4837 	if (adev->asic_type != CHIP_TONGA) {
4838 		WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4839 		WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4840 		WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4841 	}
4842 
4843 	for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4844 		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4845 
4846 	/* activate the HQD */
4847 	for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4848 		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4849 
4850 	return 0;
4851 }
4852 
4853 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4854 {
4855 	struct amdgpu_device *adev = ring->adev;
4856 	struct vi_mqd *mqd = ring->mqd_ptr;
4857 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4858 
4859 	gfx_v8_0_kiq_setting(ring);
4860 
4861 	if (adev->in_gpu_reset) { /* for GPU_RESET case */
4862 		/* reset MQD to a clean status */
4863 		if (adev->gfx.mec.mqd_backup[mqd_idx])
4864 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4865 
4866 		/* reset ring buffer */
4867 		ring->wptr = 0;
4868 		amdgpu_ring_clear_ring(ring);
4869 		mutex_lock(&adev->srbm_mutex);
4870 		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4871 		gfx_v8_0_mqd_commit(adev, mqd);
4872 		vi_srbm_select(adev, 0, 0, 0, 0);
4873 		mutex_unlock(&adev->srbm_mutex);
4874 	} else {
4875 		memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4876 		((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4877 		((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4878 		mutex_lock(&adev->srbm_mutex);
4879 		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4880 		gfx_v8_0_mqd_init(ring);
4881 		gfx_v8_0_mqd_commit(adev, mqd);
4882 		vi_srbm_select(adev, 0, 0, 0, 0);
4883 		mutex_unlock(&adev->srbm_mutex);
4884 
4885 		if (adev->gfx.mec.mqd_backup[mqd_idx])
4886 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4887 	}
4888 
4889 	return 0;
4890 }
4891 
4892 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4893 {
4894 	struct amdgpu_device *adev = ring->adev;
4895 	struct vi_mqd *mqd = ring->mqd_ptr;
4896 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
4897 
4898 	if (!adev->in_gpu_reset && !adev->gfx.in_suspend) {
4899 		memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4900 		((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4901 		((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4902 		mutex_lock(&adev->srbm_mutex);
4903 		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4904 		gfx_v8_0_mqd_init(ring);
4905 		vi_srbm_select(adev, 0, 0, 0, 0);
4906 		mutex_unlock(&adev->srbm_mutex);
4907 
4908 		if (adev->gfx.mec.mqd_backup[mqd_idx])
4909 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4910 	} else if (adev->in_gpu_reset) { /* for GPU_RESET case */
4911 		/* reset MQD to a clean status */
4912 		if (adev->gfx.mec.mqd_backup[mqd_idx])
4913 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4914 		/* reset ring buffer */
4915 		ring->wptr = 0;
4916 		amdgpu_ring_clear_ring(ring);
4917 	} else {
4918 		amdgpu_ring_clear_ring(ring);
4919 	}
4920 	return 0;
4921 }
4922 
4923 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4924 {
4925 	if (adev->asic_type > CHIP_TONGA) {
4926 		WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4927 		WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4928 	}
4929 	/* enable doorbells */
4930 	WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4931 }
4932 
4933 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4934 {
4935 	struct amdgpu_ring *ring = NULL;
4936 	int r = 0, i;
4937 
4938 	gfx_v8_0_cp_compute_enable(adev, true);
4939 
4940 	ring = &adev->gfx.kiq.ring;
4941 
4942 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
4943 	if (unlikely(r != 0))
4944 		goto done;
4945 
4946 	r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4947 	if (!r) {
4948 		r = gfx_v8_0_kiq_init_queue(ring);
4949 		amdgpu_bo_kunmap(ring->mqd_obj);
4950 		ring->mqd_ptr = NULL;
4951 	}
4952 	amdgpu_bo_unreserve(ring->mqd_obj);
4953 	if (r)
4954 		goto done;
4955 
4956 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4957 		ring = &adev->gfx.compute_ring[i];
4958 
4959 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
4960 		if (unlikely(r != 0))
4961 			goto done;
4962 		r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4963 		if (!r) {
4964 			r = gfx_v8_0_kcq_init_queue(ring);
4965 			amdgpu_bo_kunmap(ring->mqd_obj);
4966 			ring->mqd_ptr = NULL;
4967 		}
4968 		amdgpu_bo_unreserve(ring->mqd_obj);
4969 		if (r)
4970 			goto done;
4971 	}
4972 
4973 	gfx_v8_0_set_mec_doorbell_range(adev);
4974 
4975 	r = gfx_v8_0_kiq_kcq_enable(adev);
4976 	if (r)
4977 		goto done;
4978 
4979 	/* Test KIQ */
4980 	ring = &adev->gfx.kiq.ring;
4981 	ring->ready = true;
4982 	r = amdgpu_ring_test_ring(ring);
4983 	if (r) {
4984 		ring->ready = false;
4985 		goto done;
4986 	}
4987 
4988 	/* Test KCQs */
4989 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4990 		ring = &adev->gfx.compute_ring[i];
4991 		ring->ready = true;
4992 		r = amdgpu_ring_test_ring(ring);
4993 		if (r)
4994 			ring->ready = false;
4995 	}
4996 
4997 done:
4998 	return r;
4999 }
5000 
5001 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
5002 {
5003 	int r;
5004 
5005 	if (!(adev->flags & AMD_IS_APU))
5006 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5007 
5008 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
5009 			/* legacy firmware loading */
5010 		r = gfx_v8_0_cp_gfx_load_microcode(adev);
5011 		if (r)
5012 			return r;
5013 
5014 		r = gfx_v8_0_cp_compute_load_microcode(adev);
5015 		if (r)
5016 			return r;
5017 	}
5018 
5019 	r = gfx_v8_0_cp_gfx_resume(adev);
5020 	if (r)
5021 		return r;
5022 
5023 	r = gfx_v8_0_kiq_resume(adev);
5024 	if (r)
5025 		return r;
5026 
5027 	gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5028 
5029 	return 0;
5030 }
5031 
5032 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5033 {
5034 	gfx_v8_0_cp_gfx_enable(adev, enable);
5035 	gfx_v8_0_cp_compute_enable(adev, enable);
5036 }
5037 
5038 static int gfx_v8_0_hw_init(void *handle)
5039 {
5040 	int r;
5041 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5042 
5043 	gfx_v8_0_init_golden_registers(adev);
5044 	gfx_v8_0_gpu_init(adev);
5045 
5046 	r = gfx_v8_0_rlc_resume(adev);
5047 	if (r)
5048 		return r;
5049 
5050 	r = gfx_v8_0_cp_resume(adev);
5051 
5052 	return r;
5053 }
5054 
5055 static int gfx_v8_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring)
5056 {
5057 	struct amdgpu_device *adev = kiq_ring->adev;
5058 	uint32_t scratch, tmp = 0;
5059 	int r, i;
5060 
5061 	r = amdgpu_gfx_scratch_get(adev, &scratch);
5062 	if (r) {
5063 		DRM_ERROR("Failed to get scratch reg (%d).\n", r);
5064 		return r;
5065 	}
5066 	WREG32(scratch, 0xCAFEDEAD);
5067 
5068 	r = amdgpu_ring_alloc(kiq_ring, 10);
5069 	if (r) {
5070 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
5071 		amdgpu_gfx_scratch_free(adev, scratch);
5072 		return r;
5073 	}
5074 
5075 	/* unmap queues */
5076 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
5077 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
5078 						PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
5079 						PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
5080 						PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
5081 						PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
5082 	amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
5083 	amdgpu_ring_write(kiq_ring, 0);
5084 	amdgpu_ring_write(kiq_ring, 0);
5085 	amdgpu_ring_write(kiq_ring, 0);
5086 	/* write to scratch for completion */
5087 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
5088 	amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
5089 	amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
5090 	amdgpu_ring_commit(kiq_ring);
5091 
5092 	for (i = 0; i < adev->usec_timeout; i++) {
5093 		tmp = RREG32(scratch);
5094 		if (tmp == 0xDEADBEEF)
5095 			break;
5096 		DRM_UDELAY(1);
5097 	}
5098 	if (i >= adev->usec_timeout) {
5099 		DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp);
5100 		r = -EINVAL;
5101 	}
5102 	amdgpu_gfx_scratch_free(adev, scratch);
5103 	return r;
5104 }
5105 
5106 static int gfx_v8_0_hw_fini(void *handle)
5107 {
5108 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5109 	int i;
5110 
5111 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5112 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5113 
5114 	/* disable KCQ to avoid CPC touch memory not valid anymore */
5115 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
5116 		gfx_v8_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]);
5117 
5118 	if (amdgpu_sriov_vf(adev)) {
5119 		pr_debug("For SRIOV client, shouldn't do anything.\n");
5120 		return 0;
5121 	}
5122 	gfx_v8_0_cp_enable(adev, false);
5123 	gfx_v8_0_rlc_stop(adev);
5124 
5125 	amdgpu_device_ip_set_powergating_state(adev,
5126 					       AMD_IP_BLOCK_TYPE_GFX,
5127 					       AMD_PG_STATE_UNGATE);
5128 
5129 	return 0;
5130 }
5131 
5132 static int gfx_v8_0_suspend(void *handle)
5133 {
5134 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5135 	adev->gfx.in_suspend = true;
5136 	return gfx_v8_0_hw_fini(adev);
5137 }
5138 
5139 static int gfx_v8_0_resume(void *handle)
5140 {
5141 	int r;
5142 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5143 
5144 	r = gfx_v8_0_hw_init(adev);
5145 	adev->gfx.in_suspend = false;
5146 	return r;
5147 }
5148 
5149 static bool gfx_v8_0_is_idle(void *handle)
5150 {
5151 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5152 
5153 	if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5154 		return false;
5155 	else
5156 		return true;
5157 }
5158 
5159 static int gfx_v8_0_wait_for_idle(void *handle)
5160 {
5161 	unsigned i;
5162 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5163 
5164 	for (i = 0; i < adev->usec_timeout; i++) {
5165 		if (gfx_v8_0_is_idle(handle))
5166 			return 0;
5167 
5168 		udelay(1);
5169 	}
5170 	return -ETIMEDOUT;
5171 }
5172 
5173 static bool gfx_v8_0_check_soft_reset(void *handle)
5174 {
5175 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5176 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5177 	u32 tmp;
5178 
5179 	/* GRBM_STATUS */
5180 	tmp = RREG32(mmGRBM_STATUS);
5181 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5182 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5183 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5184 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5185 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5186 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5187 		   GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5188 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5189 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5190 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5191 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5192 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5193 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5194 	}
5195 
5196 	/* GRBM_STATUS2 */
5197 	tmp = RREG32(mmGRBM_STATUS2);
5198 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5199 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5200 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5201 
5202 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5203 	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5204 	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5205 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5206 						SOFT_RESET_CPF, 1);
5207 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5208 						SOFT_RESET_CPC, 1);
5209 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5210 						SOFT_RESET_CPG, 1);
5211 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5212 						SOFT_RESET_GRBM, 1);
5213 	}
5214 
5215 	/* SRBM_STATUS */
5216 	tmp = RREG32(mmSRBM_STATUS);
5217 	if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5218 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5219 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5220 	if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5221 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5222 						SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5223 
5224 	if (grbm_soft_reset || srbm_soft_reset) {
5225 		adev->gfx.grbm_soft_reset = grbm_soft_reset;
5226 		adev->gfx.srbm_soft_reset = srbm_soft_reset;
5227 		return true;
5228 	} else {
5229 		adev->gfx.grbm_soft_reset = 0;
5230 		adev->gfx.srbm_soft_reset = 0;
5231 		return false;
5232 	}
5233 }
5234 
5235 static int gfx_v8_0_pre_soft_reset(void *handle)
5236 {
5237 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5238 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5239 
5240 	if ((!adev->gfx.grbm_soft_reset) &&
5241 	    (!adev->gfx.srbm_soft_reset))
5242 		return 0;
5243 
5244 	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5245 	srbm_soft_reset = adev->gfx.srbm_soft_reset;
5246 
5247 	/* stop the rlc */
5248 	gfx_v8_0_rlc_stop(adev);
5249 
5250 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5251 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5252 		/* Disable GFX parsing/prefetching */
5253 		gfx_v8_0_cp_gfx_enable(adev, false);
5254 
5255 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5256 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5257 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5258 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5259 		int i;
5260 
5261 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5262 			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5263 
5264 			mutex_lock(&adev->srbm_mutex);
5265 			vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5266 			gfx_v8_0_deactivate_hqd(adev, 2);
5267 			vi_srbm_select(adev, 0, 0, 0, 0);
5268 			mutex_unlock(&adev->srbm_mutex);
5269 		}
5270 		/* Disable MEC parsing/prefetching */
5271 		gfx_v8_0_cp_compute_enable(adev, false);
5272 	}
5273 
5274        return 0;
5275 }
5276 
5277 static int gfx_v8_0_soft_reset(void *handle)
5278 {
5279 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5280 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5281 	u32 tmp;
5282 
5283 	if ((!adev->gfx.grbm_soft_reset) &&
5284 	    (!adev->gfx.srbm_soft_reset))
5285 		return 0;
5286 
5287 	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5288 	srbm_soft_reset = adev->gfx.srbm_soft_reset;
5289 
5290 	if (grbm_soft_reset || srbm_soft_reset) {
5291 		tmp = RREG32(mmGMCON_DEBUG);
5292 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5293 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5294 		WREG32(mmGMCON_DEBUG, tmp);
5295 		udelay(50);
5296 	}
5297 
5298 	if (grbm_soft_reset) {
5299 		tmp = RREG32(mmGRBM_SOFT_RESET);
5300 		tmp |= grbm_soft_reset;
5301 		dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5302 		WREG32(mmGRBM_SOFT_RESET, tmp);
5303 		tmp = RREG32(mmGRBM_SOFT_RESET);
5304 
5305 		udelay(50);
5306 
5307 		tmp &= ~grbm_soft_reset;
5308 		WREG32(mmGRBM_SOFT_RESET, tmp);
5309 		tmp = RREG32(mmGRBM_SOFT_RESET);
5310 	}
5311 
5312 	if (srbm_soft_reset) {
5313 		tmp = RREG32(mmSRBM_SOFT_RESET);
5314 		tmp |= srbm_soft_reset;
5315 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5316 		WREG32(mmSRBM_SOFT_RESET, tmp);
5317 		tmp = RREG32(mmSRBM_SOFT_RESET);
5318 
5319 		udelay(50);
5320 
5321 		tmp &= ~srbm_soft_reset;
5322 		WREG32(mmSRBM_SOFT_RESET, tmp);
5323 		tmp = RREG32(mmSRBM_SOFT_RESET);
5324 	}
5325 
5326 	if (grbm_soft_reset || srbm_soft_reset) {
5327 		tmp = RREG32(mmGMCON_DEBUG);
5328 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5329 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5330 		WREG32(mmGMCON_DEBUG, tmp);
5331 	}
5332 
5333 	/* Wait a little for things to settle down */
5334 	udelay(50);
5335 
5336 	return 0;
5337 }
5338 
5339 static int gfx_v8_0_post_soft_reset(void *handle)
5340 {
5341 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5342 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5343 
5344 	if ((!adev->gfx.grbm_soft_reset) &&
5345 	    (!adev->gfx.srbm_soft_reset))
5346 		return 0;
5347 
5348 	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5349 	srbm_soft_reset = adev->gfx.srbm_soft_reset;
5350 
5351 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5352 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5353 		gfx_v8_0_cp_gfx_resume(adev);
5354 
5355 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5356 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5357 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5358 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5359 		int i;
5360 
5361 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5362 			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5363 
5364 			mutex_lock(&adev->srbm_mutex);
5365 			vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5366 			gfx_v8_0_deactivate_hqd(adev, 2);
5367 			vi_srbm_select(adev, 0, 0, 0, 0);
5368 			mutex_unlock(&adev->srbm_mutex);
5369 		}
5370 		gfx_v8_0_kiq_resume(adev);
5371 	}
5372 	gfx_v8_0_rlc_start(adev);
5373 
5374 	return 0;
5375 }
5376 
5377 /**
5378  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5379  *
5380  * @adev: amdgpu_device pointer
5381  *
5382  * Fetches a GPU clock counter snapshot.
5383  * Returns the 64 bit clock counter snapshot.
5384  */
5385 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5386 {
5387 	uint64_t clock;
5388 
5389 	mutex_lock(&adev->gfx.gpu_clock_mutex);
5390 	WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5391 	clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5392 		((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5393 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
5394 	return clock;
5395 }
5396 
5397 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5398 					  uint32_t vmid,
5399 					  uint32_t gds_base, uint32_t gds_size,
5400 					  uint32_t gws_base, uint32_t gws_size,
5401 					  uint32_t oa_base, uint32_t oa_size)
5402 {
5403 	gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5404 	gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5405 
5406 	gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5407 	gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5408 
5409 	oa_base = oa_base >> AMDGPU_OA_SHIFT;
5410 	oa_size = oa_size >> AMDGPU_OA_SHIFT;
5411 
5412 	/* GDS Base */
5413 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5414 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5415 				WRITE_DATA_DST_SEL(0)));
5416 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5417 	amdgpu_ring_write(ring, 0);
5418 	amdgpu_ring_write(ring, gds_base);
5419 
5420 	/* GDS Size */
5421 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5422 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5423 				WRITE_DATA_DST_SEL(0)));
5424 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5425 	amdgpu_ring_write(ring, 0);
5426 	amdgpu_ring_write(ring, gds_size);
5427 
5428 	/* GWS */
5429 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5430 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5431 				WRITE_DATA_DST_SEL(0)));
5432 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5433 	amdgpu_ring_write(ring, 0);
5434 	amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5435 
5436 	/* OA */
5437 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5438 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5439 				WRITE_DATA_DST_SEL(0)));
5440 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5441 	amdgpu_ring_write(ring, 0);
5442 	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5443 }
5444 
5445 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5446 {
5447 	WREG32(mmSQ_IND_INDEX,
5448 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5449 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5450 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
5451 		(SQ_IND_INDEX__FORCE_READ_MASK));
5452 	return RREG32(mmSQ_IND_DATA);
5453 }
5454 
5455 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5456 			   uint32_t wave, uint32_t thread,
5457 			   uint32_t regno, uint32_t num, uint32_t *out)
5458 {
5459 	WREG32(mmSQ_IND_INDEX,
5460 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5461 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5462 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
5463 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5464 		(SQ_IND_INDEX__FORCE_READ_MASK) |
5465 		(SQ_IND_INDEX__AUTO_INCR_MASK));
5466 	while (num--)
5467 		*(out++) = RREG32(mmSQ_IND_DATA);
5468 }
5469 
5470 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5471 {
5472 	/* type 0 wave data */
5473 	dst[(*no_fields)++] = 0;
5474 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5475 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5476 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5477 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5478 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5479 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5480 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5481 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5482 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5483 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5484 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5485 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5486 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5487 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5488 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5489 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5490 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5491 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5492 }
5493 
5494 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5495 				     uint32_t wave, uint32_t start,
5496 				     uint32_t size, uint32_t *dst)
5497 {
5498 	wave_read_regs(
5499 		adev, simd, wave, 0,
5500 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5501 }
5502 
5503 
5504 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5505 	.get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5506 	.select_se_sh = &gfx_v8_0_select_se_sh,
5507 	.read_wave_data = &gfx_v8_0_read_wave_data,
5508 	.read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5509 	.select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5510 };
5511 
5512 static int gfx_v8_0_early_init(void *handle)
5513 {
5514 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5515 
5516 	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5517 	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5518 	adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5519 	gfx_v8_0_set_ring_funcs(adev);
5520 	gfx_v8_0_set_irq_funcs(adev);
5521 	gfx_v8_0_set_gds_init(adev);
5522 	gfx_v8_0_set_rlc_funcs(adev);
5523 
5524 	return 0;
5525 }
5526 
5527 static int gfx_v8_0_late_init(void *handle)
5528 {
5529 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5530 	int r;
5531 
5532 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5533 	if (r)
5534 		return r;
5535 
5536 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5537 	if (r)
5538 		return r;
5539 
5540 	/* requires IBs so do in late init after IB pool is initialized */
5541 	r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5542 	if (r)
5543 		return r;
5544 
5545 	amdgpu_device_ip_set_powergating_state(adev,
5546 					       AMD_IP_BLOCK_TYPE_GFX,
5547 					       AMD_PG_STATE_GATE);
5548 
5549 	return 0;
5550 }
5551 
5552 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5553 						       bool enable)
5554 {
5555 	if ((adev->asic_type == CHIP_POLARIS11) ||
5556 	    (adev->asic_type == CHIP_POLARIS12) ||
5557 	    (adev->asic_type == CHIP_VEGAM))
5558 		/* Send msg to SMU via Powerplay */
5559 		amdgpu_device_ip_set_powergating_state(adev,
5560 						       AMD_IP_BLOCK_TYPE_SMC,
5561 						       enable ?
5562 						       AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5563 
5564 	WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5565 }
5566 
5567 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5568 							bool enable)
5569 {
5570 	WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5571 }
5572 
5573 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5574 		bool enable)
5575 {
5576 	WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5577 }
5578 
5579 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5580 					  bool enable)
5581 {
5582 	WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5583 }
5584 
5585 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5586 						bool enable)
5587 {
5588 	WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5589 
5590 	/* Read any GFX register to wake up GFX. */
5591 	if (!enable)
5592 		RREG32(mmDB_RENDER_CONTROL);
5593 }
5594 
5595 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5596 					  bool enable)
5597 {
5598 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5599 		cz_enable_gfx_cg_power_gating(adev, true);
5600 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5601 			cz_enable_gfx_pipeline_power_gating(adev, true);
5602 	} else {
5603 		cz_enable_gfx_cg_power_gating(adev, false);
5604 		cz_enable_gfx_pipeline_power_gating(adev, false);
5605 	}
5606 }
5607 
5608 static int gfx_v8_0_set_powergating_state(void *handle,
5609 					  enum amd_powergating_state state)
5610 {
5611 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5612 	bool enable = (state == AMD_PG_STATE_GATE);
5613 
5614 	if (amdgpu_sriov_vf(adev))
5615 		return 0;
5616 
5617 	switch (adev->asic_type) {
5618 	case CHIP_CARRIZO:
5619 	case CHIP_STONEY:
5620 
5621 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5622 			cz_enable_sck_slow_down_on_power_up(adev, true);
5623 			cz_enable_sck_slow_down_on_power_down(adev, true);
5624 		} else {
5625 			cz_enable_sck_slow_down_on_power_up(adev, false);
5626 			cz_enable_sck_slow_down_on_power_down(adev, false);
5627 		}
5628 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5629 			cz_enable_cp_power_gating(adev, true);
5630 		else
5631 			cz_enable_cp_power_gating(adev, false);
5632 
5633 		cz_update_gfx_cg_power_gating(adev, enable);
5634 
5635 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5636 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5637 		else
5638 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5639 
5640 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5641 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5642 		else
5643 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5644 		break;
5645 	case CHIP_POLARIS11:
5646 	case CHIP_POLARIS12:
5647 	case CHIP_VEGAM:
5648 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5649 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5650 		else
5651 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5652 
5653 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5654 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5655 		else
5656 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5657 
5658 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5659 			polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5660 		else
5661 			polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5662 		break;
5663 	default:
5664 		break;
5665 	}
5666 
5667 	return 0;
5668 }
5669 
5670 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5671 {
5672 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5673 	int data;
5674 
5675 	if (amdgpu_sriov_vf(adev))
5676 		*flags = 0;
5677 
5678 	/* AMD_CG_SUPPORT_GFX_MGCG */
5679 	data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5680 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5681 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5682 
5683 	/* AMD_CG_SUPPORT_GFX_CGLG */
5684 	data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5685 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5686 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5687 
5688 	/* AMD_CG_SUPPORT_GFX_CGLS */
5689 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5690 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5691 
5692 	/* AMD_CG_SUPPORT_GFX_CGTS */
5693 	data = RREG32(mmCGTS_SM_CTRL_REG);
5694 	if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5695 		*flags |= AMD_CG_SUPPORT_GFX_CGTS;
5696 
5697 	/* AMD_CG_SUPPORT_GFX_CGTS_LS */
5698 	if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5699 		*flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5700 
5701 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
5702 	data = RREG32(mmRLC_MEM_SLP_CNTL);
5703 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5704 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5705 
5706 	/* AMD_CG_SUPPORT_GFX_CP_LS */
5707 	data = RREG32(mmCP_MEM_SLP_CNTL);
5708 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5709 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5710 }
5711 
5712 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5713 				     uint32_t reg_addr, uint32_t cmd)
5714 {
5715 	uint32_t data;
5716 
5717 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5718 
5719 	WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5720 	WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5721 
5722 	data = RREG32(mmRLC_SERDES_WR_CTRL);
5723 	if (adev->asic_type == CHIP_STONEY)
5724 		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5725 			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5726 			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5727 			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5728 			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5729 			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5730 			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5731 			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5732 			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5733 	else
5734 		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5735 			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5736 			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5737 			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5738 			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5739 			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5740 			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5741 			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5742 			  RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5743 			  RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5744 			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5745 	data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5746 		 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5747 		 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5748 		 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5749 
5750 	WREG32(mmRLC_SERDES_WR_CTRL, data);
5751 }
5752 
5753 #define MSG_ENTER_RLC_SAFE_MODE     1
5754 #define MSG_EXIT_RLC_SAFE_MODE      0
5755 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5756 #define RLC_GPR_REG2__REQ__SHIFT 0
5757 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5758 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5759 
5760 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5761 {
5762 	u32 data;
5763 	unsigned i;
5764 
5765 	data = RREG32(mmRLC_CNTL);
5766 	if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5767 		return;
5768 
5769 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5770 		data |= RLC_SAFE_MODE__CMD_MASK;
5771 		data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5772 		data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5773 		WREG32(mmRLC_SAFE_MODE, data);
5774 
5775 		for (i = 0; i < adev->usec_timeout; i++) {
5776 			if ((RREG32(mmRLC_GPM_STAT) &
5777 			     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5778 			      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5779 			    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5780 			     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5781 				break;
5782 			udelay(1);
5783 		}
5784 
5785 		for (i = 0; i < adev->usec_timeout; i++) {
5786 			if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5787 				break;
5788 			udelay(1);
5789 		}
5790 		adev->gfx.rlc.in_safe_mode = true;
5791 	}
5792 }
5793 
5794 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5795 {
5796 	u32 data = 0;
5797 	unsigned i;
5798 
5799 	data = RREG32(mmRLC_CNTL);
5800 	if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5801 		return;
5802 
5803 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5804 		if (adev->gfx.rlc.in_safe_mode) {
5805 			data |= RLC_SAFE_MODE__CMD_MASK;
5806 			data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5807 			WREG32(mmRLC_SAFE_MODE, data);
5808 			adev->gfx.rlc.in_safe_mode = false;
5809 		}
5810 	}
5811 
5812 	for (i = 0; i < adev->usec_timeout; i++) {
5813 		if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5814 			break;
5815 		udelay(1);
5816 	}
5817 }
5818 
5819 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5820 	.enter_safe_mode = iceland_enter_rlc_safe_mode,
5821 	.exit_safe_mode = iceland_exit_rlc_safe_mode
5822 };
5823 
5824 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5825 						      bool enable)
5826 {
5827 	uint32_t temp, data;
5828 
5829 	adev->gfx.rlc.funcs->enter_safe_mode(adev);
5830 
5831 	/* It is disabled by HW by default */
5832 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5833 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5834 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5835 				/* 1 - RLC memory Light sleep */
5836 				WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5837 
5838 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5839 				WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5840 		}
5841 
5842 		/* 3 - RLC_CGTT_MGCG_OVERRIDE */
5843 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5844 		if (adev->flags & AMD_IS_APU)
5845 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5846 				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5847 				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5848 		else
5849 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5850 				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5851 				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5852 				  RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5853 
5854 		if (temp != data)
5855 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5856 
5857 		/* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5858 		gfx_v8_0_wait_for_rlc_serdes(adev);
5859 
5860 		/* 5 - clear mgcg override */
5861 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5862 
5863 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5864 			/* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5865 			temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5866 			data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5867 			data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5868 			data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5869 			data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5870 			if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5871 			    (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5872 				data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5873 			data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5874 			data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5875 			if (temp != data)
5876 				WREG32(mmCGTS_SM_CTRL_REG, data);
5877 		}
5878 		udelay(50);
5879 
5880 		/* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5881 		gfx_v8_0_wait_for_rlc_serdes(adev);
5882 	} else {
5883 		/* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5884 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5885 		data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5886 				RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5887 				RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5888 				RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5889 		if (temp != data)
5890 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5891 
5892 		/* 2 - disable MGLS in RLC */
5893 		data = RREG32(mmRLC_MEM_SLP_CNTL);
5894 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5895 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5896 			WREG32(mmRLC_MEM_SLP_CNTL, data);
5897 		}
5898 
5899 		/* 3 - disable MGLS in CP */
5900 		data = RREG32(mmCP_MEM_SLP_CNTL);
5901 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5902 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5903 			WREG32(mmCP_MEM_SLP_CNTL, data);
5904 		}
5905 
5906 		/* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5907 		temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5908 		data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5909 				CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5910 		if (temp != data)
5911 			WREG32(mmCGTS_SM_CTRL_REG, data);
5912 
5913 		/* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5914 		gfx_v8_0_wait_for_rlc_serdes(adev);
5915 
5916 		/* 6 - set mgcg override */
5917 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5918 
5919 		udelay(50);
5920 
5921 		/* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5922 		gfx_v8_0_wait_for_rlc_serdes(adev);
5923 	}
5924 
5925 	adev->gfx.rlc.funcs->exit_safe_mode(adev);
5926 }
5927 
5928 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5929 						      bool enable)
5930 {
5931 	uint32_t temp, temp1, data, data1;
5932 
5933 	temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5934 
5935 	adev->gfx.rlc.funcs->enter_safe_mode(adev);
5936 
5937 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5938 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5939 		data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5940 		if (temp1 != data1)
5941 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5942 
5943 		/* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5944 		gfx_v8_0_wait_for_rlc_serdes(adev);
5945 
5946 		/* 2 - clear cgcg override */
5947 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5948 
5949 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5950 		gfx_v8_0_wait_for_rlc_serdes(adev);
5951 
5952 		/* 3 - write cmd to set CGLS */
5953 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5954 
5955 		/* 4 - enable cgcg */
5956 		data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5957 
5958 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5959 			/* enable cgls*/
5960 			data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5961 
5962 			temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5963 			data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5964 
5965 			if (temp1 != data1)
5966 				WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5967 		} else {
5968 			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5969 		}
5970 
5971 		if (temp != data)
5972 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5973 
5974 		/* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5975 		 * Cmp_busy/GFX_Idle interrupts
5976 		 */
5977 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5978 	} else {
5979 		/* disable cntx_empty_int_enable & GFX Idle interrupt */
5980 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5981 
5982 		/* TEST CGCG */
5983 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5984 		data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5985 				RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5986 		if (temp1 != data1)
5987 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5988 
5989 		/* read gfx register to wake up cgcg */
5990 		RREG32(mmCB_CGTT_SCLK_CTRL);
5991 		RREG32(mmCB_CGTT_SCLK_CTRL);
5992 		RREG32(mmCB_CGTT_SCLK_CTRL);
5993 		RREG32(mmCB_CGTT_SCLK_CTRL);
5994 
5995 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5996 		gfx_v8_0_wait_for_rlc_serdes(adev);
5997 
5998 		/* write cmd to Set CGCG Overrride */
5999 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6000 
6001 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6002 		gfx_v8_0_wait_for_rlc_serdes(adev);
6003 
6004 		/* write cmd to Clear CGLS */
6005 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
6006 
6007 		/* disable cgcg, cgls should be disabled too. */
6008 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
6009 			  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
6010 		if (temp != data)
6011 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6012 		/* enable interrupts again for PG */
6013 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6014 	}
6015 
6016 	gfx_v8_0_wait_for_rlc_serdes(adev);
6017 
6018 	adev->gfx.rlc.funcs->exit_safe_mode(adev);
6019 }
6020 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
6021 					    bool enable)
6022 {
6023 	if (enable) {
6024 		/* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
6025 		 * ===  MGCG + MGLS + TS(CG/LS) ===
6026 		 */
6027 		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6028 		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6029 	} else {
6030 		/* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
6031 		 * ===  CGCG + CGLS ===
6032 		 */
6033 		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6034 		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6035 	}
6036 	return 0;
6037 }
6038 
6039 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
6040 					  enum amd_clockgating_state state)
6041 {
6042 	uint32_t msg_id, pp_state = 0;
6043 	uint32_t pp_support_state = 0;
6044 
6045 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6046 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6047 			pp_support_state = PP_STATE_SUPPORT_LS;
6048 			pp_state = PP_STATE_LS;
6049 		}
6050 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6051 			pp_support_state |= PP_STATE_SUPPORT_CG;
6052 			pp_state |= PP_STATE_CG;
6053 		}
6054 		if (state == AMD_CG_STATE_UNGATE)
6055 			pp_state = 0;
6056 
6057 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6058 				PP_BLOCK_GFX_CG,
6059 				pp_support_state,
6060 				pp_state);
6061 		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6062 			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6063 	}
6064 
6065 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6066 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6067 			pp_support_state = PP_STATE_SUPPORT_LS;
6068 			pp_state = PP_STATE_LS;
6069 		}
6070 
6071 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6072 			pp_support_state |= PP_STATE_SUPPORT_CG;
6073 			pp_state |= PP_STATE_CG;
6074 		}
6075 
6076 		if (state == AMD_CG_STATE_UNGATE)
6077 			pp_state = 0;
6078 
6079 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6080 				PP_BLOCK_GFX_MG,
6081 				pp_support_state,
6082 				pp_state);
6083 		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6084 			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6085 	}
6086 
6087 	return 0;
6088 }
6089 
6090 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6091 					  enum amd_clockgating_state state)
6092 {
6093 
6094 	uint32_t msg_id, pp_state = 0;
6095 	uint32_t pp_support_state = 0;
6096 
6097 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6098 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6099 			pp_support_state = PP_STATE_SUPPORT_LS;
6100 			pp_state = PP_STATE_LS;
6101 		}
6102 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6103 			pp_support_state |= PP_STATE_SUPPORT_CG;
6104 			pp_state |= PP_STATE_CG;
6105 		}
6106 		if (state == AMD_CG_STATE_UNGATE)
6107 			pp_state = 0;
6108 
6109 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6110 				PP_BLOCK_GFX_CG,
6111 				pp_support_state,
6112 				pp_state);
6113 		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6114 			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6115 	}
6116 
6117 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6118 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6119 			pp_support_state = PP_STATE_SUPPORT_LS;
6120 			pp_state = PP_STATE_LS;
6121 		}
6122 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6123 			pp_support_state |= PP_STATE_SUPPORT_CG;
6124 			pp_state |= PP_STATE_CG;
6125 		}
6126 		if (state == AMD_CG_STATE_UNGATE)
6127 			pp_state = 0;
6128 
6129 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6130 				PP_BLOCK_GFX_3D,
6131 				pp_support_state,
6132 				pp_state);
6133 		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6134 			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6135 	}
6136 
6137 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6138 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6139 			pp_support_state = PP_STATE_SUPPORT_LS;
6140 			pp_state = PP_STATE_LS;
6141 		}
6142 
6143 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6144 			pp_support_state |= PP_STATE_SUPPORT_CG;
6145 			pp_state |= PP_STATE_CG;
6146 		}
6147 
6148 		if (state == AMD_CG_STATE_UNGATE)
6149 			pp_state = 0;
6150 
6151 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6152 				PP_BLOCK_GFX_MG,
6153 				pp_support_state,
6154 				pp_state);
6155 		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6156 			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6157 	}
6158 
6159 	if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6160 		pp_support_state = PP_STATE_SUPPORT_LS;
6161 
6162 		if (state == AMD_CG_STATE_UNGATE)
6163 			pp_state = 0;
6164 		else
6165 			pp_state = PP_STATE_LS;
6166 
6167 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6168 				PP_BLOCK_GFX_RLC,
6169 				pp_support_state,
6170 				pp_state);
6171 		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6172 			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6173 	}
6174 
6175 	if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6176 		pp_support_state = PP_STATE_SUPPORT_LS;
6177 
6178 		if (state == AMD_CG_STATE_UNGATE)
6179 			pp_state = 0;
6180 		else
6181 			pp_state = PP_STATE_LS;
6182 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6183 			PP_BLOCK_GFX_CP,
6184 			pp_support_state,
6185 			pp_state);
6186 		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6187 			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6188 	}
6189 
6190 	return 0;
6191 }
6192 
6193 static int gfx_v8_0_set_clockgating_state(void *handle,
6194 					  enum amd_clockgating_state state)
6195 {
6196 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6197 
6198 	if (amdgpu_sriov_vf(adev))
6199 		return 0;
6200 
6201 	switch (adev->asic_type) {
6202 	case CHIP_FIJI:
6203 	case CHIP_CARRIZO:
6204 	case CHIP_STONEY:
6205 		gfx_v8_0_update_gfx_clock_gating(adev,
6206 						 state == AMD_CG_STATE_GATE);
6207 		break;
6208 	case CHIP_TONGA:
6209 		gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6210 		break;
6211 	case CHIP_POLARIS10:
6212 	case CHIP_POLARIS11:
6213 	case CHIP_POLARIS12:
6214 	case CHIP_VEGAM:
6215 		gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6216 		break;
6217 	default:
6218 		break;
6219 	}
6220 	return 0;
6221 }
6222 
6223 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6224 {
6225 	return ring->adev->wb.wb[ring->rptr_offs];
6226 }
6227 
6228 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6229 {
6230 	struct amdgpu_device *adev = ring->adev;
6231 
6232 	if (ring->use_doorbell)
6233 		/* XXX check if swapping is necessary on BE */
6234 		return ring->adev->wb.wb[ring->wptr_offs];
6235 	else
6236 		return RREG32(mmCP_RB0_WPTR);
6237 }
6238 
6239 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6240 {
6241 	struct amdgpu_device *adev = ring->adev;
6242 
6243 	if (ring->use_doorbell) {
6244 		/* XXX check if swapping is necessary on BE */
6245 		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6246 		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6247 	} else {
6248 		WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6249 		(void)RREG32(mmCP_RB0_WPTR);
6250 	}
6251 }
6252 
6253 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6254 {
6255 	u32 ref_and_mask, reg_mem_engine;
6256 
6257 	if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6258 	    (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6259 		switch (ring->me) {
6260 		case 1:
6261 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6262 			break;
6263 		case 2:
6264 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6265 			break;
6266 		default:
6267 			return;
6268 		}
6269 		reg_mem_engine = 0;
6270 	} else {
6271 		ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6272 		reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6273 	}
6274 
6275 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6276 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6277 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
6278 				 reg_mem_engine));
6279 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6280 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6281 	amdgpu_ring_write(ring, ref_and_mask);
6282 	amdgpu_ring_write(ring, ref_and_mask);
6283 	amdgpu_ring_write(ring, 0x20); /* poll interval */
6284 }
6285 
6286 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6287 {
6288 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6289 	amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6290 		EVENT_INDEX(4));
6291 
6292 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6293 	amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6294 		EVENT_INDEX(0));
6295 }
6296 
6297 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6298 				      struct amdgpu_ib *ib,
6299 				      unsigned vmid, bool ctx_switch)
6300 {
6301 	u32 header, control = 0;
6302 
6303 	if (ib->flags & AMDGPU_IB_FLAG_CE)
6304 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6305 	else
6306 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6307 
6308 	control |= ib->length_dw | (vmid << 24);
6309 
6310 	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6311 		control |= INDIRECT_BUFFER_PRE_ENB(1);
6312 
6313 		if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6314 			gfx_v8_0_ring_emit_de_meta(ring);
6315 	}
6316 
6317 	amdgpu_ring_write(ring, header);
6318 	amdgpu_ring_write(ring,
6319 #ifdef __BIG_ENDIAN
6320 			  (2 << 0) |
6321 #endif
6322 			  (ib->gpu_addr & 0xFFFFFFFC));
6323 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6324 	amdgpu_ring_write(ring, control);
6325 }
6326 
6327 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6328 					  struct amdgpu_ib *ib,
6329 					  unsigned vmid, bool ctx_switch)
6330 {
6331 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6332 
6333 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6334 	amdgpu_ring_write(ring,
6335 #ifdef __BIG_ENDIAN
6336 				(2 << 0) |
6337 #endif
6338 				(ib->gpu_addr & 0xFFFFFFFC));
6339 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6340 	amdgpu_ring_write(ring, control);
6341 }
6342 
6343 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6344 					 u64 seq, unsigned flags)
6345 {
6346 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6347 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6348 
6349 	/* EVENT_WRITE_EOP - flush caches, send int */
6350 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6351 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6352 				 EOP_TC_ACTION_EN |
6353 				 EOP_TC_WB_ACTION_EN |
6354 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6355 				 EVENT_INDEX(5)));
6356 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6357 	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6358 			  DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6359 	amdgpu_ring_write(ring, lower_32_bits(seq));
6360 	amdgpu_ring_write(ring, upper_32_bits(seq));
6361 
6362 }
6363 
6364 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6365 {
6366 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6367 	uint32_t seq = ring->fence_drv.sync_seq;
6368 	uint64_t addr = ring->fence_drv.gpu_addr;
6369 
6370 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6371 	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6372 				 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6373 				 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6374 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6375 	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6376 	amdgpu_ring_write(ring, seq);
6377 	amdgpu_ring_write(ring, 0xffffffff);
6378 	amdgpu_ring_write(ring, 4); /* poll interval */
6379 }
6380 
6381 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6382 					unsigned vmid, uint64_t pd_addr)
6383 {
6384 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6385 
6386 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6387 
6388 	/* wait for the invalidate to complete */
6389 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6390 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6391 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6392 				 WAIT_REG_MEM_ENGINE(0))); /* me */
6393 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6394 	amdgpu_ring_write(ring, 0);
6395 	amdgpu_ring_write(ring, 0); /* ref */
6396 	amdgpu_ring_write(ring, 0); /* mask */
6397 	amdgpu_ring_write(ring, 0x20); /* poll interval */
6398 
6399 	/* compute doesn't have PFP */
6400 	if (usepfp) {
6401 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6402 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6403 		amdgpu_ring_write(ring, 0x0);
6404 	}
6405 }
6406 
6407 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6408 {
6409 	return ring->adev->wb.wb[ring->wptr_offs];
6410 }
6411 
6412 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6413 {
6414 	struct amdgpu_device *adev = ring->adev;
6415 
6416 	/* XXX check if swapping is necessary on BE */
6417 	adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6418 	WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6419 }
6420 
6421 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
6422 					   bool acquire)
6423 {
6424 	struct amdgpu_device *adev = ring->adev;
6425 	int pipe_num, tmp, reg;
6426 	int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
6427 
6428 	pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
6429 
6430 	/* first me only has 2 entries, GFX and HP3D */
6431 	if (ring->me > 0)
6432 		pipe_num -= 2;
6433 
6434 	reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
6435 	tmp = RREG32(reg);
6436 	tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
6437 	WREG32(reg, tmp);
6438 }
6439 
6440 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
6441 					    struct amdgpu_ring *ring,
6442 					    bool acquire)
6443 {
6444 	int i, pipe;
6445 	bool reserve;
6446 	struct amdgpu_ring *iring;
6447 
6448 	mutex_lock(&adev->gfx.pipe_reserve_mutex);
6449 	pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
6450 	if (acquire)
6451 		set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6452 	else
6453 		clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6454 
6455 	if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
6456 		/* Clear all reservations - everyone reacquires all resources */
6457 		for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
6458 			gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
6459 						       true);
6460 
6461 		for (i = 0; i < adev->gfx.num_compute_rings; ++i)
6462 			gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
6463 						       true);
6464 	} else {
6465 		/* Lower all pipes without a current reservation */
6466 		for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
6467 			iring = &adev->gfx.gfx_ring[i];
6468 			pipe = amdgpu_gfx_queue_to_bit(adev,
6469 						       iring->me,
6470 						       iring->pipe,
6471 						       0);
6472 			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6473 			gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6474 		}
6475 
6476 		for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
6477 			iring = &adev->gfx.compute_ring[i];
6478 			pipe = amdgpu_gfx_queue_to_bit(adev,
6479 						       iring->me,
6480 						       iring->pipe,
6481 						       0);
6482 			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6483 			gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6484 		}
6485 	}
6486 
6487 	mutex_unlock(&adev->gfx.pipe_reserve_mutex);
6488 }
6489 
6490 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
6491 				      struct amdgpu_ring *ring,
6492 				      bool acquire)
6493 {
6494 	uint32_t pipe_priority = acquire ? 0x2 : 0x0;
6495 	uint32_t queue_priority = acquire ? 0xf : 0x0;
6496 
6497 	mutex_lock(&adev->srbm_mutex);
6498 	vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6499 
6500 	WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
6501 	WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
6502 
6503 	vi_srbm_select(adev, 0, 0, 0, 0);
6504 	mutex_unlock(&adev->srbm_mutex);
6505 }
6506 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
6507 					       enum drm_sched_priority priority)
6508 {
6509 	struct amdgpu_device *adev = ring->adev;
6510 	bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
6511 
6512 	if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6513 		return;
6514 
6515 	gfx_v8_0_hqd_set_priority(adev, ring, acquire);
6516 	gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
6517 }
6518 
6519 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6520 					     u64 addr, u64 seq,
6521 					     unsigned flags)
6522 {
6523 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6524 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6525 
6526 	/* RELEASE_MEM - flush caches, send int */
6527 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6528 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6529 				 EOP_TC_ACTION_EN |
6530 				 EOP_TC_WB_ACTION_EN |
6531 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6532 				 EVENT_INDEX(5)));
6533 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6534 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6535 	amdgpu_ring_write(ring, upper_32_bits(addr));
6536 	amdgpu_ring_write(ring, lower_32_bits(seq));
6537 	amdgpu_ring_write(ring, upper_32_bits(seq));
6538 }
6539 
6540 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6541 					 u64 seq, unsigned int flags)
6542 {
6543 	/* we only allocate 32bit for each seq wb address */
6544 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6545 
6546 	/* write fence seq to the "addr" */
6547 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6548 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6549 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6550 	amdgpu_ring_write(ring, lower_32_bits(addr));
6551 	amdgpu_ring_write(ring, upper_32_bits(addr));
6552 	amdgpu_ring_write(ring, lower_32_bits(seq));
6553 
6554 	if (flags & AMDGPU_FENCE_FLAG_INT) {
6555 		/* set register to trigger INT */
6556 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6557 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6558 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6559 		amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6560 		amdgpu_ring_write(ring, 0);
6561 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6562 	}
6563 }
6564 
6565 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6566 {
6567 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6568 	amdgpu_ring_write(ring, 0);
6569 }
6570 
6571 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6572 {
6573 	uint32_t dw2 = 0;
6574 
6575 	if (amdgpu_sriov_vf(ring->adev))
6576 		gfx_v8_0_ring_emit_ce_meta(ring);
6577 
6578 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6579 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6580 		gfx_v8_0_ring_emit_vgt_flush(ring);
6581 		/* set load_global_config & load_global_uconfig */
6582 		dw2 |= 0x8001;
6583 		/* set load_cs_sh_regs */
6584 		dw2 |= 0x01000000;
6585 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
6586 		dw2 |= 0x10002;
6587 
6588 		/* set load_ce_ram if preamble presented */
6589 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6590 			dw2 |= 0x10000000;
6591 	} else {
6592 		/* still load_ce_ram if this is the first time preamble presented
6593 		 * although there is no context switch happens.
6594 		 */
6595 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6596 			dw2 |= 0x10000000;
6597 	}
6598 
6599 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6600 	amdgpu_ring_write(ring, dw2);
6601 	amdgpu_ring_write(ring, 0);
6602 }
6603 
6604 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6605 {
6606 	unsigned ret;
6607 
6608 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6609 	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6610 	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6611 	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6612 	ret = ring->wptr & ring->buf_mask;
6613 	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6614 	return ret;
6615 }
6616 
6617 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6618 {
6619 	unsigned cur;
6620 
6621 	BUG_ON(offset > ring->buf_mask);
6622 	BUG_ON(ring->ring[offset] != 0x55aa55aa);
6623 
6624 	cur = (ring->wptr & ring->buf_mask) - 1;
6625 	if (likely(cur > offset))
6626 		ring->ring[offset] = cur - offset;
6627 	else
6628 		ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6629 }
6630 
6631 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6632 {
6633 	struct amdgpu_device *adev = ring->adev;
6634 
6635 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6636 	amdgpu_ring_write(ring, 0 |	/* src: register*/
6637 				(5 << 8) |	/* dst: memory */
6638 				(1 << 20));	/* write confirm */
6639 	amdgpu_ring_write(ring, reg);
6640 	amdgpu_ring_write(ring, 0);
6641 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6642 				adev->virt.reg_val_offs * 4));
6643 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6644 				adev->virt.reg_val_offs * 4));
6645 }
6646 
6647 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6648 				  uint32_t val)
6649 {
6650 	uint32_t cmd;
6651 
6652 	switch (ring->funcs->type) {
6653 	case AMDGPU_RING_TYPE_GFX:
6654 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6655 		break;
6656 	case AMDGPU_RING_TYPE_KIQ:
6657 		cmd = 1 << 16; /* no inc addr */
6658 		break;
6659 	default:
6660 		cmd = WR_CONFIRM;
6661 		break;
6662 	}
6663 
6664 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6665 	amdgpu_ring_write(ring, cmd);
6666 	amdgpu_ring_write(ring, reg);
6667 	amdgpu_ring_write(ring, 0);
6668 	amdgpu_ring_write(ring, val);
6669 }
6670 
6671 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6672 						 enum amdgpu_interrupt_state state)
6673 {
6674 	WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6675 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6676 }
6677 
6678 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6679 						     int me, int pipe,
6680 						     enum amdgpu_interrupt_state state)
6681 {
6682 	u32 mec_int_cntl, mec_int_cntl_reg;
6683 
6684 	/*
6685 	 * amdgpu controls only the first MEC. That's why this function only
6686 	 * handles the setting of interrupts for this specific MEC. All other
6687 	 * pipes' interrupts are set by amdkfd.
6688 	 */
6689 
6690 	if (me == 1) {
6691 		switch (pipe) {
6692 		case 0:
6693 			mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6694 			break;
6695 		case 1:
6696 			mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6697 			break;
6698 		case 2:
6699 			mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6700 			break;
6701 		case 3:
6702 			mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6703 			break;
6704 		default:
6705 			DRM_DEBUG("invalid pipe %d\n", pipe);
6706 			return;
6707 		}
6708 	} else {
6709 		DRM_DEBUG("invalid me %d\n", me);
6710 		return;
6711 	}
6712 
6713 	switch (state) {
6714 	case AMDGPU_IRQ_STATE_DISABLE:
6715 		mec_int_cntl = RREG32(mec_int_cntl_reg);
6716 		mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6717 		WREG32(mec_int_cntl_reg, mec_int_cntl);
6718 		break;
6719 	case AMDGPU_IRQ_STATE_ENABLE:
6720 		mec_int_cntl = RREG32(mec_int_cntl_reg);
6721 		mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6722 		WREG32(mec_int_cntl_reg, mec_int_cntl);
6723 		break;
6724 	default:
6725 		break;
6726 	}
6727 }
6728 
6729 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6730 					     struct amdgpu_irq_src *source,
6731 					     unsigned type,
6732 					     enum amdgpu_interrupt_state state)
6733 {
6734 	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6735 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6736 
6737 	return 0;
6738 }
6739 
6740 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6741 					      struct amdgpu_irq_src *source,
6742 					      unsigned type,
6743 					      enum amdgpu_interrupt_state state)
6744 {
6745 	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6746 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6747 
6748 	return 0;
6749 }
6750 
6751 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6752 					    struct amdgpu_irq_src *src,
6753 					    unsigned type,
6754 					    enum amdgpu_interrupt_state state)
6755 {
6756 	switch (type) {
6757 	case AMDGPU_CP_IRQ_GFX_EOP:
6758 		gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6759 		break;
6760 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6761 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6762 		break;
6763 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6764 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6765 		break;
6766 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6767 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6768 		break;
6769 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6770 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6771 		break;
6772 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6773 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6774 		break;
6775 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6776 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6777 		break;
6778 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6779 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6780 		break;
6781 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6782 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6783 		break;
6784 	default:
6785 		break;
6786 	}
6787 	return 0;
6788 }
6789 
6790 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6791 			    struct amdgpu_irq_src *source,
6792 			    struct amdgpu_iv_entry *entry)
6793 {
6794 	int i;
6795 	u8 me_id, pipe_id, queue_id;
6796 	struct amdgpu_ring *ring;
6797 
6798 	DRM_DEBUG("IH: CP EOP\n");
6799 	me_id = (entry->ring_id & 0x0c) >> 2;
6800 	pipe_id = (entry->ring_id & 0x03) >> 0;
6801 	queue_id = (entry->ring_id & 0x70) >> 4;
6802 
6803 	switch (me_id) {
6804 	case 0:
6805 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6806 		break;
6807 	case 1:
6808 	case 2:
6809 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6810 			ring = &adev->gfx.compute_ring[i];
6811 			/* Per-queue interrupt is supported for MEC starting from VI.
6812 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
6813 			  */
6814 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6815 				amdgpu_fence_process(ring);
6816 		}
6817 		break;
6818 	}
6819 	return 0;
6820 }
6821 
6822 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6823 				 struct amdgpu_irq_src *source,
6824 				 struct amdgpu_iv_entry *entry)
6825 {
6826 	DRM_ERROR("Illegal register access in command stream\n");
6827 	schedule_work(&adev->reset_work);
6828 	return 0;
6829 }
6830 
6831 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6832 				  struct amdgpu_irq_src *source,
6833 				  struct amdgpu_iv_entry *entry)
6834 {
6835 	DRM_ERROR("Illegal instruction in command stream\n");
6836 	schedule_work(&adev->reset_work);
6837 	return 0;
6838 }
6839 
6840 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6841 					    struct amdgpu_irq_src *src,
6842 					    unsigned int type,
6843 					    enum amdgpu_interrupt_state state)
6844 {
6845 	struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6846 
6847 	switch (type) {
6848 	case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6849 		WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
6850 			     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6851 		if (ring->me == 1)
6852 			WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
6853 				     ring->pipe,
6854 				     GENERIC2_INT_ENABLE,
6855 				     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6856 		else
6857 			WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
6858 				     ring->pipe,
6859 				     GENERIC2_INT_ENABLE,
6860 				     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6861 		break;
6862 	default:
6863 		BUG(); /* kiq only support GENERIC2_INT now */
6864 		break;
6865 	}
6866 	return 0;
6867 }
6868 
6869 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
6870 			    struct amdgpu_irq_src *source,
6871 			    struct amdgpu_iv_entry *entry)
6872 {
6873 	u8 me_id, pipe_id, queue_id;
6874 	struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6875 
6876 	me_id = (entry->ring_id & 0x0c) >> 2;
6877 	pipe_id = (entry->ring_id & 0x03) >> 0;
6878 	queue_id = (entry->ring_id & 0x70) >> 4;
6879 	DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
6880 		   me_id, pipe_id, queue_id);
6881 
6882 	amdgpu_fence_process(ring);
6883 	return 0;
6884 }
6885 
6886 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6887 	.name = "gfx_v8_0",
6888 	.early_init = gfx_v8_0_early_init,
6889 	.late_init = gfx_v8_0_late_init,
6890 	.sw_init = gfx_v8_0_sw_init,
6891 	.sw_fini = gfx_v8_0_sw_fini,
6892 	.hw_init = gfx_v8_0_hw_init,
6893 	.hw_fini = gfx_v8_0_hw_fini,
6894 	.suspend = gfx_v8_0_suspend,
6895 	.resume = gfx_v8_0_resume,
6896 	.is_idle = gfx_v8_0_is_idle,
6897 	.wait_for_idle = gfx_v8_0_wait_for_idle,
6898 	.check_soft_reset = gfx_v8_0_check_soft_reset,
6899 	.pre_soft_reset = gfx_v8_0_pre_soft_reset,
6900 	.soft_reset = gfx_v8_0_soft_reset,
6901 	.post_soft_reset = gfx_v8_0_post_soft_reset,
6902 	.set_clockgating_state = gfx_v8_0_set_clockgating_state,
6903 	.set_powergating_state = gfx_v8_0_set_powergating_state,
6904 	.get_clockgating_state = gfx_v8_0_get_clockgating_state,
6905 };
6906 
6907 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6908 	.type = AMDGPU_RING_TYPE_GFX,
6909 	.align_mask = 0xff,
6910 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6911 	.support_64bit_ptrs = false,
6912 	.get_rptr = gfx_v8_0_ring_get_rptr,
6913 	.get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6914 	.set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6915 	.emit_frame_size = /* maximum 215dw if count 16 IBs in */
6916 		5 +  /* COND_EXEC */
6917 		7 +  /* PIPELINE_SYNC */
6918 		VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6919 		8 +  /* FENCE for VM_FLUSH */
6920 		20 + /* GDS switch */
6921 		4 + /* double SWITCH_BUFFER,
6922 		       the first COND_EXEC jump to the place just
6923 			   prior to this double SWITCH_BUFFER  */
6924 		5 + /* COND_EXEC */
6925 		7 +	 /*	HDP_flush */
6926 		4 +	 /*	VGT_flush */
6927 		14 + /*	CE_META */
6928 		31 + /*	DE_META */
6929 		3 + /* CNTX_CTRL */
6930 		5 + /* HDP_INVL */
6931 		8 + 8 + /* FENCE x2 */
6932 		2, /* SWITCH_BUFFER */
6933 	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_gfx */
6934 	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6935 	.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6936 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6937 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6938 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6939 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6940 	.test_ring = gfx_v8_0_ring_test_ring,
6941 	.test_ib = gfx_v8_0_ring_test_ib,
6942 	.insert_nop = amdgpu_ring_insert_nop,
6943 	.pad_ib = amdgpu_ring_generic_pad_ib,
6944 	.emit_switch_buffer = gfx_v8_ring_emit_sb,
6945 	.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6946 	.init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6947 	.patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6948 	.emit_wreg = gfx_v8_0_ring_emit_wreg,
6949 };
6950 
6951 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6952 	.type = AMDGPU_RING_TYPE_COMPUTE,
6953 	.align_mask = 0xff,
6954 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6955 	.support_64bit_ptrs = false,
6956 	.get_rptr = gfx_v8_0_ring_get_rptr,
6957 	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
6958 	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
6959 	.emit_frame_size =
6960 		20 + /* gfx_v8_0_ring_emit_gds_switch */
6961 		7 + /* gfx_v8_0_ring_emit_hdp_flush */
6962 		5 + /* hdp_invalidate */
6963 		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6964 		VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6965 		7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6966 	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_compute */
6967 	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
6968 	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
6969 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6970 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6971 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6972 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6973 	.test_ring = gfx_v8_0_ring_test_ring,
6974 	.test_ib = gfx_v8_0_ring_test_ib,
6975 	.insert_nop = amdgpu_ring_insert_nop,
6976 	.pad_ib = amdgpu_ring_generic_pad_ib,
6977 	.set_priority = gfx_v8_0_ring_set_priority_compute,
6978 	.emit_wreg = gfx_v8_0_ring_emit_wreg,
6979 };
6980 
6981 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6982 	.type = AMDGPU_RING_TYPE_KIQ,
6983 	.align_mask = 0xff,
6984 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6985 	.support_64bit_ptrs = false,
6986 	.get_rptr = gfx_v8_0_ring_get_rptr,
6987 	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
6988 	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
6989 	.emit_frame_size =
6990 		20 + /* gfx_v8_0_ring_emit_gds_switch */
6991 		7 + /* gfx_v8_0_ring_emit_hdp_flush */
6992 		5 + /* hdp_invalidate */
6993 		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6994 		17 + /* gfx_v8_0_ring_emit_vm_flush */
6995 		7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6996 	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_compute */
6997 	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
6998 	.emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6999 	.test_ring = gfx_v8_0_ring_test_ring,
7000 	.test_ib = gfx_v8_0_ring_test_ib,
7001 	.insert_nop = amdgpu_ring_insert_nop,
7002 	.pad_ib = amdgpu_ring_generic_pad_ib,
7003 	.emit_rreg = gfx_v8_0_ring_emit_rreg,
7004 	.emit_wreg = gfx_v8_0_ring_emit_wreg,
7005 };
7006 
7007 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7008 {
7009 	int i;
7010 
7011 	adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7012 
7013 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7014 		adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7015 
7016 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
7017 		adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7018 }
7019 
7020 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7021 	.set = gfx_v8_0_set_eop_interrupt_state,
7022 	.process = gfx_v8_0_eop_irq,
7023 };
7024 
7025 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7026 	.set = gfx_v8_0_set_priv_reg_fault_state,
7027 	.process = gfx_v8_0_priv_reg_irq,
7028 };
7029 
7030 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7031 	.set = gfx_v8_0_set_priv_inst_fault_state,
7032 	.process = gfx_v8_0_priv_inst_irq,
7033 };
7034 
7035 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
7036 	.set = gfx_v8_0_kiq_set_interrupt_state,
7037 	.process = gfx_v8_0_kiq_irq,
7038 };
7039 
7040 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7041 {
7042 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7043 	adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7044 
7045 	adev->gfx.priv_reg_irq.num_types = 1;
7046 	adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7047 
7048 	adev->gfx.priv_inst_irq.num_types = 1;
7049 	adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7050 
7051 	adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
7052 	adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
7053 }
7054 
7055 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7056 {
7057 	adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7058 }
7059 
7060 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7061 {
7062 	/* init asci gds info */
7063 	adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7064 	adev->gds.gws.total_size = 64;
7065 	adev->gds.oa.total_size = 16;
7066 
7067 	if (adev->gds.mem.total_size == 64 * 1024) {
7068 		adev->gds.mem.gfx_partition_size = 4096;
7069 		adev->gds.mem.cs_partition_size = 4096;
7070 
7071 		adev->gds.gws.gfx_partition_size = 4;
7072 		adev->gds.gws.cs_partition_size = 4;
7073 
7074 		adev->gds.oa.gfx_partition_size = 4;
7075 		adev->gds.oa.cs_partition_size = 1;
7076 	} else {
7077 		adev->gds.mem.gfx_partition_size = 1024;
7078 		adev->gds.mem.cs_partition_size = 1024;
7079 
7080 		adev->gds.gws.gfx_partition_size = 16;
7081 		adev->gds.gws.cs_partition_size = 16;
7082 
7083 		adev->gds.oa.gfx_partition_size = 4;
7084 		adev->gds.oa.cs_partition_size = 4;
7085 	}
7086 }
7087 
7088 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7089 						 u32 bitmap)
7090 {
7091 	u32 data;
7092 
7093 	if (!bitmap)
7094 		return;
7095 
7096 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7097 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7098 
7099 	WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7100 }
7101 
7102 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7103 {
7104 	u32 data, mask;
7105 
7106 	data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7107 		RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7108 
7109 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7110 
7111 	return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7112 }
7113 
7114 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7115 {
7116 	int i, j, k, counter, active_cu_number = 0;
7117 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7118 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7119 	unsigned disable_masks[4 * 2];
7120 	u32 ao_cu_num;
7121 
7122 	memset(cu_info, 0, sizeof(*cu_info));
7123 
7124 	if (adev->flags & AMD_IS_APU)
7125 		ao_cu_num = 2;
7126 	else
7127 		ao_cu_num = adev->gfx.config.max_cu_per_sh;
7128 
7129 	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7130 
7131 	mutex_lock(&adev->grbm_idx_mutex);
7132 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7133 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7134 			mask = 1;
7135 			ao_bitmap = 0;
7136 			counter = 0;
7137 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7138 			if (i < 4 && j < 2)
7139 				gfx_v8_0_set_user_cu_inactive_bitmap(
7140 					adev, disable_masks[i * 2 + j]);
7141 			bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7142 			cu_info->bitmap[i][j] = bitmap;
7143 
7144 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7145 				if (bitmap & mask) {
7146 					if (counter < ao_cu_num)
7147 						ao_bitmap |= mask;
7148 					counter ++;
7149 				}
7150 				mask <<= 1;
7151 			}
7152 			active_cu_number += counter;
7153 			if (i < 2 && j < 2)
7154 				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7155 			cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7156 		}
7157 	}
7158 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7159 	mutex_unlock(&adev->grbm_idx_mutex);
7160 
7161 	cu_info->number = active_cu_number;
7162 	cu_info->ao_cu_mask = ao_cu_mask;
7163 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7164 	cu_info->max_waves_per_simd = 10;
7165 	cu_info->max_scratch_slots_per_cu = 32;
7166 	cu_info->wave_front_size = 64;
7167 	cu_info->lds_size = 64;
7168 }
7169 
7170 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7171 {
7172 	.type = AMD_IP_BLOCK_TYPE_GFX,
7173 	.major = 8,
7174 	.minor = 0,
7175 	.rev = 0,
7176 	.funcs = &gfx_v8_0_ip_funcs,
7177 };
7178 
7179 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7180 {
7181 	.type = AMD_IP_BLOCK_TYPE_GFX,
7182 	.major = 8,
7183 	.minor = 1,
7184 	.rev = 0,
7185 	.funcs = &gfx_v8_0_ip_funcs,
7186 };
7187 
7188 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7189 {
7190 	uint64_t ce_payload_addr;
7191 	int cnt_ce;
7192 	union {
7193 		struct vi_ce_ib_state regular;
7194 		struct vi_ce_ib_state_chained_ib chained;
7195 	} ce_payload = {};
7196 
7197 	if (ring->adev->virt.chained_ib_support) {
7198 		ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7199 			offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7200 		cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7201 	} else {
7202 		ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7203 			offsetof(struct vi_gfx_meta_data, ce_payload);
7204 		cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7205 	}
7206 
7207 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7208 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7209 				WRITE_DATA_DST_SEL(8) |
7210 				WR_CONFIRM) |
7211 				WRITE_DATA_CACHE_POLICY(0));
7212 	amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7213 	amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7214 	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7215 }
7216 
7217 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7218 {
7219 	uint64_t de_payload_addr, gds_addr, csa_addr;
7220 	int cnt_de;
7221 	union {
7222 		struct vi_de_ib_state regular;
7223 		struct vi_de_ib_state_chained_ib chained;
7224 	} de_payload = {};
7225 
7226 	csa_addr = amdgpu_csa_vaddr(ring->adev);
7227 	gds_addr = csa_addr + 4096;
7228 	if (ring->adev->virt.chained_ib_support) {
7229 		de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7230 		de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7231 		de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7232 		cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7233 	} else {
7234 		de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7235 		de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7236 		de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7237 		cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7238 	}
7239 
7240 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7241 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7242 				WRITE_DATA_DST_SEL(8) |
7243 				WR_CONFIRM) |
7244 				WRITE_DATA_CACHE_POLICY(0));
7245 	amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7246 	amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7247 	amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7248 }
7249