xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c (revision 2596e07a)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vid.h"
29 #include "amdgpu_ucode.h"
30 #include "clearstate_vi.h"
31 
32 #include "gmc/gmc_8_2_d.h"
33 #include "gmc/gmc_8_2_sh_mask.h"
34 
35 #include "oss/oss_3_0_d.h"
36 #include "oss/oss_3_0_sh_mask.h"
37 
38 #include "bif/bif_5_0_d.h"
39 #include "bif/bif_5_0_sh_mask.h"
40 
41 #include "gca/gfx_8_0_d.h"
42 #include "gca/gfx_8_0_enum.h"
43 #include "gca/gfx_8_0_sh_mask.h"
44 #include "gca/gfx_8_0_enum.h"
45 
46 #include "uvd/uvd_5_0_d.h"
47 #include "uvd/uvd_5_0_sh_mask.h"
48 
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
51 
52 #define GFX8_NUM_GFX_RINGS     1
53 #define GFX8_NUM_COMPUTE_RINGS 8
54 
55 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
56 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
58 
59 #define ARRAY_MODE(x)					((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
60 #define PIPE_CONFIG(x)					((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
61 #define TILE_SPLIT(x)					((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
62 #define MICRO_TILE_MODE_NEW(x)				((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
63 #define SAMPLE_SPLIT(x)					((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
64 #define BANK_WIDTH(x)					((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
65 #define BANK_HEIGHT(x)					((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
66 #define MACRO_TILE_ASPECT(x)				((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
67 #define NUM_BANKS(x)					((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
68 
69 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
70 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
71 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
72 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
73 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
74 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
75 
76 /* BPM SERDES CMD */
77 #define SET_BPM_SERDES_CMD    1
78 #define CLE_BPM_SERDES_CMD    0
79 
80 /* BPM Register Address*/
81 enum {
82 	BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
83 	BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
84 	BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
85 	BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
86 	BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
87 	BPM_REG_FGCG_MAX
88 };
89 
90 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
91 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
92 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
93 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
94 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
95 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
96 
97 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
98 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
99 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
100 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
101 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
102 
103 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
104 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
105 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
106 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
107 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
109 
110 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
111 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
112 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
113 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
114 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
115 
116 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
117 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
118 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
119 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
120 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
122 
123 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
124 {
125 	{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
126 	{mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
127 	{mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
128 	{mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
129 	{mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
130 	{mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
131 	{mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
132 	{mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
133 	{mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
134 	{mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
135 	{mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
136 	{mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
137 	{mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
138 	{mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
139 	{mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
140 	{mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
141 };
142 
143 static const u32 golden_settings_tonga_a11[] =
144 {
145 	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
146 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
147 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
148 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
149 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
150 	mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
151 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
152 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
153 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
154 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
155 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
156 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
157 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
158 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
159 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
160 };
161 
162 static const u32 tonga_golden_common_all[] =
163 {
164 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
165 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
166 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
167 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
168 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
169 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
170 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
171 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
172 };
173 
174 static const u32 tonga_mgcg_cgcg_init[] =
175 {
176 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
177 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
178 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
179 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
180 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
181 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
182 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
183 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
184 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
185 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
186 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
187 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
188 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
189 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
190 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
191 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
192 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
193 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
194 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
195 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
196 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
197 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
198 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
199 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
200 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
201 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
202 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
203 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
204 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
205 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
206 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
207 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
208 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
209 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
210 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
211 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
212 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
213 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
214 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
215 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
216 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
217 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
218 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
219 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
220 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
221 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
222 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
223 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
224 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
225 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
226 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
227 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
228 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
229 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
230 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
231 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
232 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
233 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
234 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
235 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
236 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
237 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
238 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
239 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
240 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
241 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
242 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
243 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
244 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
245 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
246 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
247 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
248 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
249 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
250 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
251 };
252 
253 static const u32 fiji_golden_common_all[] =
254 {
255 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
256 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
257 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
258 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
259 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
260 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
261 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
262 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
263 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
264 	mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
265 };
266 
267 static const u32 golden_settings_fiji_a10[] =
268 {
269 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
270 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
271 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
272 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
273 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
274 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
275 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
276 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
277 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
278 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
279 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
280 };
281 
282 static const u32 fiji_mgcg_cgcg_init[] =
283 {
284 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
285 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
286 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
287 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
288 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
289 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
290 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
291 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
292 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
293 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
294 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
295 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
296 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
297 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
298 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
299 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
300 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
301 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
302 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
303 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
304 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
305 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
306 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
307 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
308 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
309 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
310 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
311 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
312 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
313 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
314 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
315 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
316 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
317 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
318 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
319 };
320 
321 static const u32 golden_settings_iceland_a11[] =
322 {
323 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
324 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
325 	mmDB_DEBUG3, 0xc0000000, 0xc0000000,
326 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
327 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
328 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
329 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
330 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
331 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
332 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
333 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
334 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
335 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
336 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
337 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
338 };
339 
340 static const u32 iceland_golden_common_all[] =
341 {
342 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
343 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
344 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
345 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
346 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
347 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
348 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
349 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
350 };
351 
352 static const u32 iceland_mgcg_cgcg_init[] =
353 {
354 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
355 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
356 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
357 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
358 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
359 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
360 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
361 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
362 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
363 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
364 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
365 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
366 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
367 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
368 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
369 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
370 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
371 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
372 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
373 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
374 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
375 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
376 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
377 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
378 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
379 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
380 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
381 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
382 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
383 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
384 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
385 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
386 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
387 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
388 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
389 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
390 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
391 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
392 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
393 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
394 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
395 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
396 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
397 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
398 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
399 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
400 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
401 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
402 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
403 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
404 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
405 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
406 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
407 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
408 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
409 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
410 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
411 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
412 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
413 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
414 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
415 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
416 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
417 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
418 };
419 
420 static const u32 cz_golden_settings_a11[] =
421 {
422 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
423 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
424 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
425 	mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
426 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
427 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
428 	mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
429 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
430 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
431 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
432 };
433 
434 static const u32 cz_golden_common_all[] =
435 {
436 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
437 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
438 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
439 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
440 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
441 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
442 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
443 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
444 };
445 
446 static const u32 cz_mgcg_cgcg_init[] =
447 {
448 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
449 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
450 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
451 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
452 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
453 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
454 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
455 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
456 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
457 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
458 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
459 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
460 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
461 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
462 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
463 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
464 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
465 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
466 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
467 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
468 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
469 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
470 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
471 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
472 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
473 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
474 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
475 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
476 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
477 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
478 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
479 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
480 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
481 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
482 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
483 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
484 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
485 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
486 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
487 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
488 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
489 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
490 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
491 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
492 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
493 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
494 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
495 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
496 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
497 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
498 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
499 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
500 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
501 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
502 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
503 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
504 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
505 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
506 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
507 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
508 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
509 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
510 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
511 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
512 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
513 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
514 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
515 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
516 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
517 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
518 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
519 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
520 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
521 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
522 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
523 };
524 
525 static const u32 stoney_golden_settings_a11[] =
526 {
527 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
528 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
529 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
530 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
531 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
532 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
533   	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
534 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
535 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
536 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
537 };
538 
539 static const u32 stoney_golden_common_all[] =
540 {
541 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
542 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
543 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
544 	mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
545 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
546 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
547 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
548 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
549 };
550 
551 static const u32 stoney_mgcg_cgcg_init[] =
552 {
553 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
554 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
555 	mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
556 	mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
557 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
558 	mmATC_MISC_CG, 0xffffffff, 0x000c0200,
559 };
560 
561 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
562 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
563 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
564 
565 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
566 {
567 	switch (adev->asic_type) {
568 	case CHIP_TOPAZ:
569 		amdgpu_program_register_sequence(adev,
570 						 iceland_mgcg_cgcg_init,
571 						 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
572 		amdgpu_program_register_sequence(adev,
573 						 golden_settings_iceland_a11,
574 						 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
575 		amdgpu_program_register_sequence(adev,
576 						 iceland_golden_common_all,
577 						 (const u32)ARRAY_SIZE(iceland_golden_common_all));
578 		break;
579 	case CHIP_FIJI:
580 		amdgpu_program_register_sequence(adev,
581 						 fiji_mgcg_cgcg_init,
582 						 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
583 		amdgpu_program_register_sequence(adev,
584 						 golden_settings_fiji_a10,
585 						 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
586 		amdgpu_program_register_sequence(adev,
587 						 fiji_golden_common_all,
588 						 (const u32)ARRAY_SIZE(fiji_golden_common_all));
589 		break;
590 
591 	case CHIP_TONGA:
592 		amdgpu_program_register_sequence(adev,
593 						 tonga_mgcg_cgcg_init,
594 						 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
595 		amdgpu_program_register_sequence(adev,
596 						 golden_settings_tonga_a11,
597 						 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
598 		amdgpu_program_register_sequence(adev,
599 						 tonga_golden_common_all,
600 						 (const u32)ARRAY_SIZE(tonga_golden_common_all));
601 		break;
602 	case CHIP_CARRIZO:
603 		amdgpu_program_register_sequence(adev,
604 						 cz_mgcg_cgcg_init,
605 						 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
606 		amdgpu_program_register_sequence(adev,
607 						 cz_golden_settings_a11,
608 						 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
609 		amdgpu_program_register_sequence(adev,
610 						 cz_golden_common_all,
611 						 (const u32)ARRAY_SIZE(cz_golden_common_all));
612 		break;
613 	case CHIP_STONEY:
614 		amdgpu_program_register_sequence(adev,
615 						 stoney_mgcg_cgcg_init,
616 						 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
617 		amdgpu_program_register_sequence(adev,
618 						 stoney_golden_settings_a11,
619 						 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
620 		amdgpu_program_register_sequence(adev,
621 						 stoney_golden_common_all,
622 						 (const u32)ARRAY_SIZE(stoney_golden_common_all));
623 		break;
624 	default:
625 		break;
626 	}
627 }
628 
629 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
630 {
631 	int i;
632 
633 	adev->gfx.scratch.num_reg = 7;
634 	adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
635 	for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
636 		adev->gfx.scratch.free[i] = true;
637 		adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
638 	}
639 }
640 
641 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
642 {
643 	struct amdgpu_device *adev = ring->adev;
644 	uint32_t scratch;
645 	uint32_t tmp = 0;
646 	unsigned i;
647 	int r;
648 
649 	r = amdgpu_gfx_scratch_get(adev, &scratch);
650 	if (r) {
651 		DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
652 		return r;
653 	}
654 	WREG32(scratch, 0xCAFEDEAD);
655 	r = amdgpu_ring_lock(ring, 3);
656 	if (r) {
657 		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
658 			  ring->idx, r);
659 		amdgpu_gfx_scratch_free(adev, scratch);
660 		return r;
661 	}
662 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
663 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
664 	amdgpu_ring_write(ring, 0xDEADBEEF);
665 	amdgpu_ring_unlock_commit(ring);
666 
667 	for (i = 0; i < adev->usec_timeout; i++) {
668 		tmp = RREG32(scratch);
669 		if (tmp == 0xDEADBEEF)
670 			break;
671 		DRM_UDELAY(1);
672 	}
673 	if (i < adev->usec_timeout) {
674 		DRM_INFO("ring test on %d succeeded in %d usecs\n",
675 			 ring->idx, i);
676 	} else {
677 		DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
678 			  ring->idx, scratch, tmp);
679 		r = -EINVAL;
680 	}
681 	amdgpu_gfx_scratch_free(adev, scratch);
682 	return r;
683 }
684 
685 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
686 {
687 	struct amdgpu_device *adev = ring->adev;
688 	struct amdgpu_ib ib;
689 	struct fence *f = NULL;
690 	uint32_t scratch;
691 	uint32_t tmp = 0;
692 	unsigned i;
693 	int r;
694 
695 	r = amdgpu_gfx_scratch_get(adev, &scratch);
696 	if (r) {
697 		DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
698 		return r;
699 	}
700 	WREG32(scratch, 0xCAFEDEAD);
701 	memset(&ib, 0, sizeof(ib));
702 	r = amdgpu_ib_get(ring, NULL, 256, &ib);
703 	if (r) {
704 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
705 		goto err1;
706 	}
707 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
708 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
709 	ib.ptr[2] = 0xDEADBEEF;
710 	ib.length_dw = 3;
711 
712 	r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL,
713 						 AMDGPU_FENCE_OWNER_UNDEFINED,
714 						 &f);
715 	if (r)
716 		goto err2;
717 
718 	r = fence_wait(f, false);
719 	if (r) {
720 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
721 		goto err2;
722 	}
723 	for (i = 0; i < adev->usec_timeout; i++) {
724 		tmp = RREG32(scratch);
725 		if (tmp == 0xDEADBEEF)
726 			break;
727 		DRM_UDELAY(1);
728 	}
729 	if (i < adev->usec_timeout) {
730 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
731 			 ring->idx, i);
732 		goto err2;
733 	} else {
734 		DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
735 			  scratch, tmp);
736 		r = -EINVAL;
737 	}
738 err2:
739 	fence_put(f);
740 	amdgpu_ib_free(adev, &ib);
741 err1:
742 	amdgpu_gfx_scratch_free(adev, scratch);
743 	return r;
744 }
745 
746 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
747 {
748 	const char *chip_name;
749 	char fw_name[30];
750 	int err;
751 	struct amdgpu_firmware_info *info = NULL;
752 	const struct common_firmware_header *header = NULL;
753 	const struct gfx_firmware_header_v1_0 *cp_hdr;
754 
755 	DRM_DEBUG("\n");
756 
757 	switch (adev->asic_type) {
758 	case CHIP_TOPAZ:
759 		chip_name = "topaz";
760 		break;
761 	case CHIP_TONGA:
762 		chip_name = "tonga";
763 		break;
764 	case CHIP_CARRIZO:
765 		chip_name = "carrizo";
766 		break;
767 	case CHIP_FIJI:
768 		chip_name = "fiji";
769 		break;
770 	case CHIP_STONEY:
771 		chip_name = "stoney";
772 		break;
773 	default:
774 		BUG();
775 	}
776 
777 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
778 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
779 	if (err)
780 		goto out;
781 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
782 	if (err)
783 		goto out;
784 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
785 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
786 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
787 
788 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
789 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
790 	if (err)
791 		goto out;
792 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
793 	if (err)
794 		goto out;
795 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
796 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
797 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
798 
799 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
800 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
801 	if (err)
802 		goto out;
803 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
804 	if (err)
805 		goto out;
806 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
807 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
808 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
809 
810 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
811 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
812 	if (err)
813 		goto out;
814 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
815 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
816 	adev->gfx.rlc_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
817 	adev->gfx.rlc_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
818 
819 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
820 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
821 	if (err)
822 		goto out;
823 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
824 	if (err)
825 		goto out;
826 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
827 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
828 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
829 
830 	if ((adev->asic_type != CHIP_STONEY) &&
831 	    (adev->asic_type != CHIP_TOPAZ)) {
832 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
833 		err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
834 		if (!err) {
835 			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
836 			if (err)
837 				goto out;
838 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
839 				adev->gfx.mec2_fw->data;
840 			adev->gfx.mec2_fw_version =
841 				le32_to_cpu(cp_hdr->header.ucode_version);
842 			adev->gfx.mec2_feature_version =
843 				le32_to_cpu(cp_hdr->ucode_feature_version);
844 		} else {
845 			err = 0;
846 			adev->gfx.mec2_fw = NULL;
847 		}
848 	}
849 
850 	if (adev->firmware.smu_load) {
851 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
852 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
853 		info->fw = adev->gfx.pfp_fw;
854 		header = (const struct common_firmware_header *)info->fw->data;
855 		adev->firmware.fw_size +=
856 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
857 
858 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
859 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
860 		info->fw = adev->gfx.me_fw;
861 		header = (const struct common_firmware_header *)info->fw->data;
862 		adev->firmware.fw_size +=
863 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
864 
865 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
866 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
867 		info->fw = adev->gfx.ce_fw;
868 		header = (const struct common_firmware_header *)info->fw->data;
869 		adev->firmware.fw_size +=
870 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
871 
872 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
873 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
874 		info->fw = adev->gfx.rlc_fw;
875 		header = (const struct common_firmware_header *)info->fw->data;
876 		adev->firmware.fw_size +=
877 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
878 
879 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
880 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
881 		info->fw = adev->gfx.mec_fw;
882 		header = (const struct common_firmware_header *)info->fw->data;
883 		adev->firmware.fw_size +=
884 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
885 
886 		if (adev->gfx.mec2_fw) {
887 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
888 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
889 			info->fw = adev->gfx.mec2_fw;
890 			header = (const struct common_firmware_header *)info->fw->data;
891 			adev->firmware.fw_size +=
892 				ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
893 		}
894 
895 	}
896 
897 out:
898 	if (err) {
899 		dev_err(adev->dev,
900 			"gfx8: Failed to load firmware \"%s\"\n",
901 			fw_name);
902 		release_firmware(adev->gfx.pfp_fw);
903 		adev->gfx.pfp_fw = NULL;
904 		release_firmware(adev->gfx.me_fw);
905 		adev->gfx.me_fw = NULL;
906 		release_firmware(adev->gfx.ce_fw);
907 		adev->gfx.ce_fw = NULL;
908 		release_firmware(adev->gfx.rlc_fw);
909 		adev->gfx.rlc_fw = NULL;
910 		release_firmware(adev->gfx.mec_fw);
911 		adev->gfx.mec_fw = NULL;
912 		release_firmware(adev->gfx.mec2_fw);
913 		adev->gfx.mec2_fw = NULL;
914 	}
915 	return err;
916 }
917 
918 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
919 {
920 	int r;
921 
922 	if (adev->gfx.mec.hpd_eop_obj) {
923 		r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
924 		if (unlikely(r != 0))
925 			dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
926 		amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
927 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
928 
929 		amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
930 		adev->gfx.mec.hpd_eop_obj = NULL;
931 	}
932 }
933 
934 #define MEC_HPD_SIZE 2048
935 
936 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
937 {
938 	int r;
939 	u32 *hpd;
940 
941 	/*
942 	 * we assign only 1 pipe because all other pipes will
943 	 * be handled by KFD
944 	 */
945 	adev->gfx.mec.num_mec = 1;
946 	adev->gfx.mec.num_pipe = 1;
947 	adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
948 
949 	if (adev->gfx.mec.hpd_eop_obj == NULL) {
950 		r = amdgpu_bo_create(adev,
951 				     adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
952 				     PAGE_SIZE, true,
953 				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
954 				     &adev->gfx.mec.hpd_eop_obj);
955 		if (r) {
956 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
957 			return r;
958 		}
959 	}
960 
961 	r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
962 	if (unlikely(r != 0)) {
963 		gfx_v8_0_mec_fini(adev);
964 		return r;
965 	}
966 	r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
967 			  &adev->gfx.mec.hpd_eop_gpu_addr);
968 	if (r) {
969 		dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
970 		gfx_v8_0_mec_fini(adev);
971 		return r;
972 	}
973 	r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
974 	if (r) {
975 		dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
976 		gfx_v8_0_mec_fini(adev);
977 		return r;
978 	}
979 
980 	memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
981 
982 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
983 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
984 
985 	return 0;
986 }
987 
988 static const u32 vgpr_init_compute_shader[] =
989 {
990 	0x7e000209, 0x7e020208,
991 	0x7e040207, 0x7e060206,
992 	0x7e080205, 0x7e0a0204,
993 	0x7e0c0203, 0x7e0e0202,
994 	0x7e100201, 0x7e120200,
995 	0x7e140209, 0x7e160208,
996 	0x7e180207, 0x7e1a0206,
997 	0x7e1c0205, 0x7e1e0204,
998 	0x7e200203, 0x7e220202,
999 	0x7e240201, 0x7e260200,
1000 	0x7e280209, 0x7e2a0208,
1001 	0x7e2c0207, 0x7e2e0206,
1002 	0x7e300205, 0x7e320204,
1003 	0x7e340203, 0x7e360202,
1004 	0x7e380201, 0x7e3a0200,
1005 	0x7e3c0209, 0x7e3e0208,
1006 	0x7e400207, 0x7e420206,
1007 	0x7e440205, 0x7e460204,
1008 	0x7e480203, 0x7e4a0202,
1009 	0x7e4c0201, 0x7e4e0200,
1010 	0x7e500209, 0x7e520208,
1011 	0x7e540207, 0x7e560206,
1012 	0x7e580205, 0x7e5a0204,
1013 	0x7e5c0203, 0x7e5e0202,
1014 	0x7e600201, 0x7e620200,
1015 	0x7e640209, 0x7e660208,
1016 	0x7e680207, 0x7e6a0206,
1017 	0x7e6c0205, 0x7e6e0204,
1018 	0x7e700203, 0x7e720202,
1019 	0x7e740201, 0x7e760200,
1020 	0x7e780209, 0x7e7a0208,
1021 	0x7e7c0207, 0x7e7e0206,
1022 	0xbf8a0000, 0xbf810000,
1023 };
1024 
1025 static const u32 sgpr_init_compute_shader[] =
1026 {
1027 	0xbe8a0100, 0xbe8c0102,
1028 	0xbe8e0104, 0xbe900106,
1029 	0xbe920108, 0xbe940100,
1030 	0xbe960102, 0xbe980104,
1031 	0xbe9a0106, 0xbe9c0108,
1032 	0xbe9e0100, 0xbea00102,
1033 	0xbea20104, 0xbea40106,
1034 	0xbea60108, 0xbea80100,
1035 	0xbeaa0102, 0xbeac0104,
1036 	0xbeae0106, 0xbeb00108,
1037 	0xbeb20100, 0xbeb40102,
1038 	0xbeb60104, 0xbeb80106,
1039 	0xbeba0108, 0xbebc0100,
1040 	0xbebe0102, 0xbec00104,
1041 	0xbec20106, 0xbec40108,
1042 	0xbec60100, 0xbec80102,
1043 	0xbee60004, 0xbee70005,
1044 	0xbeea0006, 0xbeeb0007,
1045 	0xbee80008, 0xbee90009,
1046 	0xbefc0000, 0xbf8a0000,
1047 	0xbf810000, 0x00000000,
1048 };
1049 
1050 static const u32 vgpr_init_regs[] =
1051 {
1052 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1053 	mmCOMPUTE_RESOURCE_LIMITS, 0,
1054 	mmCOMPUTE_NUM_THREAD_X, 256*4,
1055 	mmCOMPUTE_NUM_THREAD_Y, 1,
1056 	mmCOMPUTE_NUM_THREAD_Z, 1,
1057 	mmCOMPUTE_PGM_RSRC2, 20,
1058 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1059 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1060 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1061 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1062 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1063 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1064 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1065 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1066 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1067 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1068 };
1069 
1070 static const u32 sgpr1_init_regs[] =
1071 {
1072 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1073 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1074 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1075 	mmCOMPUTE_NUM_THREAD_Y, 1,
1076 	mmCOMPUTE_NUM_THREAD_Z, 1,
1077 	mmCOMPUTE_PGM_RSRC2, 20,
1078 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1079 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1080 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1081 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1082 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1083 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1084 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1085 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1086 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1087 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1088 };
1089 
1090 static const u32 sgpr2_init_regs[] =
1091 {
1092 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1093 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1094 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1095 	mmCOMPUTE_NUM_THREAD_Y, 1,
1096 	mmCOMPUTE_NUM_THREAD_Z, 1,
1097 	mmCOMPUTE_PGM_RSRC2, 20,
1098 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1099 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1100 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1101 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1102 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1103 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1104 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1105 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1106 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1107 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1108 };
1109 
1110 static const u32 sec_ded_counter_registers[] =
1111 {
1112 	mmCPC_EDC_ATC_CNT,
1113 	mmCPC_EDC_SCRATCH_CNT,
1114 	mmCPC_EDC_UCODE_CNT,
1115 	mmCPF_EDC_ATC_CNT,
1116 	mmCPF_EDC_ROQ_CNT,
1117 	mmCPF_EDC_TAG_CNT,
1118 	mmCPG_EDC_ATC_CNT,
1119 	mmCPG_EDC_DMA_CNT,
1120 	mmCPG_EDC_TAG_CNT,
1121 	mmDC_EDC_CSINVOC_CNT,
1122 	mmDC_EDC_RESTORE_CNT,
1123 	mmDC_EDC_STATE_CNT,
1124 	mmGDS_EDC_CNT,
1125 	mmGDS_EDC_GRBM_CNT,
1126 	mmGDS_EDC_OA_DED,
1127 	mmSPI_EDC_CNT,
1128 	mmSQC_ATC_EDC_GATCL1_CNT,
1129 	mmSQC_EDC_CNT,
1130 	mmSQ_EDC_DED_CNT,
1131 	mmSQ_EDC_INFO,
1132 	mmSQ_EDC_SEC_CNT,
1133 	mmTCC_EDC_CNT,
1134 	mmTCP_ATC_EDC_GATCL1_CNT,
1135 	mmTCP_EDC_CNT,
1136 	mmTD_EDC_CNT
1137 };
1138 
1139 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1140 {
1141 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1142 	struct amdgpu_ib ib;
1143 	struct fence *f = NULL;
1144 	int r, i;
1145 	u32 tmp;
1146 	unsigned total_size, vgpr_offset, sgpr_offset;
1147 	u64 gpu_addr;
1148 
1149 	/* only supported on CZ */
1150 	if (adev->asic_type != CHIP_CARRIZO)
1151 		return 0;
1152 
1153 	/* bail if the compute ring is not ready */
1154 	if (!ring->ready)
1155 		return 0;
1156 
1157 	tmp = RREG32(mmGB_EDC_MODE);
1158 	WREG32(mmGB_EDC_MODE, 0);
1159 
1160 	total_size =
1161 		(((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1162 	total_size +=
1163 		(((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1164 	total_size +=
1165 		(((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1166 	total_size = ALIGN(total_size, 256);
1167 	vgpr_offset = total_size;
1168 	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1169 	sgpr_offset = total_size;
1170 	total_size += sizeof(sgpr_init_compute_shader);
1171 
1172 	/* allocate an indirect buffer to put the commands in */
1173 	memset(&ib, 0, sizeof(ib));
1174 	r = amdgpu_ib_get(ring, NULL, total_size, &ib);
1175 	if (r) {
1176 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1177 		return r;
1178 	}
1179 
1180 	/* load the compute shaders */
1181 	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1182 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1183 
1184 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1185 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1186 
1187 	/* init the ib length to 0 */
1188 	ib.length_dw = 0;
1189 
1190 	/* VGPR */
1191 	/* write the register state for the compute dispatch */
1192 	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1193 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1194 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1195 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1196 	}
1197 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1198 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1199 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1200 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1201 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1202 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1203 
1204 	/* write dispatch packet */
1205 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1206 	ib.ptr[ib.length_dw++] = 8; /* x */
1207 	ib.ptr[ib.length_dw++] = 1; /* y */
1208 	ib.ptr[ib.length_dw++] = 1; /* z */
1209 	ib.ptr[ib.length_dw++] =
1210 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1211 
1212 	/* write CS partial flush packet */
1213 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1214 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1215 
1216 	/* SGPR1 */
1217 	/* write the register state for the compute dispatch */
1218 	for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1219 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1220 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1221 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1222 	}
1223 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1224 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1225 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1226 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1227 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1228 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1229 
1230 	/* write dispatch packet */
1231 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1232 	ib.ptr[ib.length_dw++] = 8; /* x */
1233 	ib.ptr[ib.length_dw++] = 1; /* y */
1234 	ib.ptr[ib.length_dw++] = 1; /* z */
1235 	ib.ptr[ib.length_dw++] =
1236 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1237 
1238 	/* write CS partial flush packet */
1239 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1240 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1241 
1242 	/* SGPR2 */
1243 	/* write the register state for the compute dispatch */
1244 	for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1245 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1246 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1247 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1248 	}
1249 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1250 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1251 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1252 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1253 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1254 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1255 
1256 	/* write dispatch packet */
1257 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1258 	ib.ptr[ib.length_dw++] = 8; /* x */
1259 	ib.ptr[ib.length_dw++] = 1; /* y */
1260 	ib.ptr[ib.length_dw++] = 1; /* z */
1261 	ib.ptr[ib.length_dw++] =
1262 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1263 
1264 	/* write CS partial flush packet */
1265 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1266 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1267 
1268 	/* shedule the ib on the ring */
1269 	r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL,
1270 						 AMDGPU_FENCE_OWNER_UNDEFINED,
1271 						 &f);
1272 	if (r) {
1273 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1274 		goto fail;
1275 	}
1276 
1277 	/* wait for the GPU to finish processing the IB */
1278 	r = fence_wait(f, false);
1279 	if (r) {
1280 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1281 		goto fail;
1282 	}
1283 
1284 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1285 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1286 	WREG32(mmGB_EDC_MODE, tmp);
1287 
1288 	tmp = RREG32(mmCC_GC_EDC_CONFIG);
1289 	tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1290 	WREG32(mmCC_GC_EDC_CONFIG, tmp);
1291 
1292 
1293 	/* read back registers to clear the counters */
1294 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1295 		RREG32(sec_ded_counter_registers[i]);
1296 
1297 fail:
1298 	fence_put(f);
1299 	amdgpu_ib_free(adev, &ib);
1300 
1301 	return r;
1302 }
1303 
1304 static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1305 {
1306 	u32 gb_addr_config;
1307 	u32 mc_shared_chmap, mc_arb_ramcfg;
1308 	u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1309 	u32 tmp;
1310 
1311 	switch (adev->asic_type) {
1312 	case CHIP_TOPAZ:
1313 		adev->gfx.config.max_shader_engines = 1;
1314 		adev->gfx.config.max_tile_pipes = 2;
1315 		adev->gfx.config.max_cu_per_sh = 6;
1316 		adev->gfx.config.max_sh_per_se = 1;
1317 		adev->gfx.config.max_backends_per_se = 2;
1318 		adev->gfx.config.max_texture_channel_caches = 2;
1319 		adev->gfx.config.max_gprs = 256;
1320 		adev->gfx.config.max_gs_threads = 32;
1321 		adev->gfx.config.max_hw_contexts = 8;
1322 
1323 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1324 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1325 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1326 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1327 		gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1328 		break;
1329 	case CHIP_FIJI:
1330 		adev->gfx.config.max_shader_engines = 4;
1331 		adev->gfx.config.max_tile_pipes = 16;
1332 		adev->gfx.config.max_cu_per_sh = 16;
1333 		adev->gfx.config.max_sh_per_se = 1;
1334 		adev->gfx.config.max_backends_per_se = 4;
1335 		adev->gfx.config.max_texture_channel_caches = 16;
1336 		adev->gfx.config.max_gprs = 256;
1337 		adev->gfx.config.max_gs_threads = 32;
1338 		adev->gfx.config.max_hw_contexts = 8;
1339 
1340 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1341 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1342 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1343 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1344 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1345 		break;
1346 	case CHIP_TONGA:
1347 		adev->gfx.config.max_shader_engines = 4;
1348 		adev->gfx.config.max_tile_pipes = 8;
1349 		adev->gfx.config.max_cu_per_sh = 8;
1350 		adev->gfx.config.max_sh_per_se = 1;
1351 		adev->gfx.config.max_backends_per_se = 2;
1352 		adev->gfx.config.max_texture_channel_caches = 8;
1353 		adev->gfx.config.max_gprs = 256;
1354 		adev->gfx.config.max_gs_threads = 32;
1355 		adev->gfx.config.max_hw_contexts = 8;
1356 
1357 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1358 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1359 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1360 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1361 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1362 		break;
1363 	case CHIP_CARRIZO:
1364 		adev->gfx.config.max_shader_engines = 1;
1365 		adev->gfx.config.max_tile_pipes = 2;
1366 		adev->gfx.config.max_sh_per_se = 1;
1367 		adev->gfx.config.max_backends_per_se = 2;
1368 
1369 		switch (adev->pdev->revision) {
1370 		case 0xc4:
1371 		case 0x84:
1372 		case 0xc8:
1373 		case 0xcc:
1374 		case 0xe1:
1375 		case 0xe3:
1376 			/* B10 */
1377 			adev->gfx.config.max_cu_per_sh = 8;
1378 			break;
1379 		case 0xc5:
1380 		case 0x81:
1381 		case 0x85:
1382 		case 0xc9:
1383 		case 0xcd:
1384 		case 0xe2:
1385 		case 0xe4:
1386 			/* B8 */
1387 			adev->gfx.config.max_cu_per_sh = 6;
1388 			break;
1389 		case 0xc6:
1390 		case 0xca:
1391 		case 0xce:
1392 		case 0x88:
1393 			/* B6 */
1394 			adev->gfx.config.max_cu_per_sh = 6;
1395 			break;
1396 		case 0xc7:
1397 		case 0x87:
1398 		case 0xcb:
1399 		case 0xe5:
1400 		case 0x89:
1401 		default:
1402 			/* B4 */
1403 			adev->gfx.config.max_cu_per_sh = 4;
1404 			break;
1405 		}
1406 
1407 		adev->gfx.config.max_texture_channel_caches = 2;
1408 		adev->gfx.config.max_gprs = 256;
1409 		adev->gfx.config.max_gs_threads = 32;
1410 		adev->gfx.config.max_hw_contexts = 8;
1411 
1412 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1413 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1414 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1415 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1416 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1417 		break;
1418 	case CHIP_STONEY:
1419 		adev->gfx.config.max_shader_engines = 1;
1420 		adev->gfx.config.max_tile_pipes = 2;
1421 		adev->gfx.config.max_sh_per_se = 1;
1422 		adev->gfx.config.max_backends_per_se = 1;
1423 
1424 		switch (adev->pdev->revision) {
1425 		case 0xc0:
1426 		case 0xc1:
1427 		case 0xc2:
1428 		case 0xc4:
1429 		case 0xc8:
1430 		case 0xc9:
1431 			adev->gfx.config.max_cu_per_sh = 3;
1432 			break;
1433 		case 0xd0:
1434 		case 0xd1:
1435 		case 0xd2:
1436 		default:
1437 			adev->gfx.config.max_cu_per_sh = 2;
1438 			break;
1439 		}
1440 
1441 		adev->gfx.config.max_texture_channel_caches = 2;
1442 		adev->gfx.config.max_gprs = 256;
1443 		adev->gfx.config.max_gs_threads = 16;
1444 		adev->gfx.config.max_hw_contexts = 8;
1445 
1446 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1447 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1448 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1449 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1450 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1451 		break;
1452 	default:
1453 		adev->gfx.config.max_shader_engines = 2;
1454 		adev->gfx.config.max_tile_pipes = 4;
1455 		adev->gfx.config.max_cu_per_sh = 2;
1456 		adev->gfx.config.max_sh_per_se = 1;
1457 		adev->gfx.config.max_backends_per_se = 2;
1458 		adev->gfx.config.max_texture_channel_caches = 4;
1459 		adev->gfx.config.max_gprs = 256;
1460 		adev->gfx.config.max_gs_threads = 32;
1461 		adev->gfx.config.max_hw_contexts = 8;
1462 
1463 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1464 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1465 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1466 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1467 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1468 		break;
1469 	}
1470 
1471 	mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1472 	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1473 	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1474 
1475 	adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1476 	adev->gfx.config.mem_max_burst_length_bytes = 256;
1477 	if (adev->flags & AMD_IS_APU) {
1478 		/* Get memory bank mapping mode. */
1479 		tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1480 		dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1481 		dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1482 
1483 		tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1484 		dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1485 		dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1486 
1487 		/* Validate settings in case only one DIMM installed. */
1488 		if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1489 			dimm00_addr_map = 0;
1490 		if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1491 			dimm01_addr_map = 0;
1492 		if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1493 			dimm10_addr_map = 0;
1494 		if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1495 			dimm11_addr_map = 0;
1496 
1497 		/* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1498 		/* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1499 		if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1500 			adev->gfx.config.mem_row_size_in_kb = 2;
1501 		else
1502 			adev->gfx.config.mem_row_size_in_kb = 1;
1503 	} else {
1504 		tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1505 		adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1506 		if (adev->gfx.config.mem_row_size_in_kb > 4)
1507 			adev->gfx.config.mem_row_size_in_kb = 4;
1508 	}
1509 
1510 	adev->gfx.config.shader_engine_tile_size = 32;
1511 	adev->gfx.config.num_gpus = 1;
1512 	adev->gfx.config.multi_gpu_tile_size = 64;
1513 
1514 	/* fix up row size */
1515 	switch (adev->gfx.config.mem_row_size_in_kb) {
1516 	case 1:
1517 	default:
1518 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1519 		break;
1520 	case 2:
1521 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1522 		break;
1523 	case 4:
1524 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1525 		break;
1526 	}
1527 	adev->gfx.config.gb_addr_config = gb_addr_config;
1528 }
1529 
1530 static int gfx_v8_0_sw_init(void *handle)
1531 {
1532 	int i, r;
1533 	struct amdgpu_ring *ring;
1534 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1535 
1536 	/* EOP Event */
1537 	r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1538 	if (r)
1539 		return r;
1540 
1541 	/* Privileged reg */
1542 	r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1543 	if (r)
1544 		return r;
1545 
1546 	/* Privileged inst */
1547 	r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
1548 	if (r)
1549 		return r;
1550 
1551 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1552 
1553 	gfx_v8_0_scratch_init(adev);
1554 
1555 	r = gfx_v8_0_init_microcode(adev);
1556 	if (r) {
1557 		DRM_ERROR("Failed to load gfx firmware!\n");
1558 		return r;
1559 	}
1560 
1561 	r = gfx_v8_0_mec_init(adev);
1562 	if (r) {
1563 		DRM_ERROR("Failed to init MEC BOs!\n");
1564 		return r;
1565 	}
1566 
1567 	/* set up the gfx ring */
1568 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1569 		ring = &adev->gfx.gfx_ring[i];
1570 		ring->ring_obj = NULL;
1571 		sprintf(ring->name, "gfx");
1572 		/* no gfx doorbells on iceland */
1573 		if (adev->asic_type != CHIP_TOPAZ) {
1574 			ring->use_doorbell = true;
1575 			ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
1576 		}
1577 
1578 		r = amdgpu_ring_init(adev, ring, 1024 * 1024,
1579 				     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1580 				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
1581 				     AMDGPU_RING_TYPE_GFX);
1582 		if (r)
1583 			return r;
1584 	}
1585 
1586 	/* set up the compute queues */
1587 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1588 		unsigned irq_type;
1589 
1590 		/* max 32 queues per MEC */
1591 		if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
1592 			DRM_ERROR("Too many (%d) compute rings!\n", i);
1593 			break;
1594 		}
1595 		ring = &adev->gfx.compute_ring[i];
1596 		ring->ring_obj = NULL;
1597 		ring->use_doorbell = true;
1598 		ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
1599 		ring->me = 1; /* first MEC */
1600 		ring->pipe = i / 8;
1601 		ring->queue = i % 8;
1602 		sprintf(ring->name, "comp %d.%d.%d", ring->me, ring->pipe, ring->queue);
1603 		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
1604 		/* type-2 packets are deprecated on MEC, use type-3 instead */
1605 		r = amdgpu_ring_init(adev, ring, 1024 * 1024,
1606 				     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1607 				     &adev->gfx.eop_irq, irq_type,
1608 				     AMDGPU_RING_TYPE_COMPUTE);
1609 		if (r)
1610 			return r;
1611 	}
1612 
1613 	/* reserve GDS, GWS and OA resource for gfx */
1614 	r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
1615 			PAGE_SIZE, true,
1616 			AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
1617 			NULL, &adev->gds.gds_gfx_bo);
1618 	if (r)
1619 		return r;
1620 
1621 	r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
1622 		PAGE_SIZE, true,
1623 		AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
1624 		NULL, &adev->gds.gws_gfx_bo);
1625 	if (r)
1626 		return r;
1627 
1628 	r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
1629 			PAGE_SIZE, true,
1630 			AMDGPU_GEM_DOMAIN_OA, 0, NULL,
1631 			NULL, &adev->gds.oa_gfx_bo);
1632 	if (r)
1633 		return r;
1634 
1635 	adev->gfx.ce_ram_size = 0x8000;
1636 
1637 	gfx_v8_0_gpu_early_init(adev);
1638 
1639 	return 0;
1640 }
1641 
1642 static int gfx_v8_0_sw_fini(void *handle)
1643 {
1644 	int i;
1645 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1646 
1647 	amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
1648 	amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
1649 	amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
1650 
1651 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1652 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1653 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
1654 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1655 
1656 	gfx_v8_0_mec_fini(adev);
1657 
1658 	return 0;
1659 }
1660 
1661 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
1662 {
1663 	uint32_t *modearray, *mod2array;
1664 	const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
1665 	const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
1666 	u32 reg_offset;
1667 
1668 	modearray = adev->gfx.config.tile_mode_array;
1669 	mod2array = adev->gfx.config.macrotile_mode_array;
1670 
1671 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1672 		modearray[reg_offset] = 0;
1673 
1674 	for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
1675 		mod2array[reg_offset] = 0;
1676 
1677 	switch (adev->asic_type) {
1678 	case CHIP_TOPAZ:
1679 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1680 				PIPE_CONFIG(ADDR_SURF_P2) |
1681 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1682 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1683 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1684 				PIPE_CONFIG(ADDR_SURF_P2) |
1685 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1686 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1687 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1688 				PIPE_CONFIG(ADDR_SURF_P2) |
1689 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1690 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1691 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1692 				PIPE_CONFIG(ADDR_SURF_P2) |
1693 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1694 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1695 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1696 				PIPE_CONFIG(ADDR_SURF_P2) |
1697 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1698 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1699 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1700 				PIPE_CONFIG(ADDR_SURF_P2) |
1701 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1702 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1703 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1704 				PIPE_CONFIG(ADDR_SURF_P2) |
1705 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1706 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1707 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1708 				PIPE_CONFIG(ADDR_SURF_P2));
1709 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1710 				PIPE_CONFIG(ADDR_SURF_P2) |
1711 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1712 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1713 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1714 				 PIPE_CONFIG(ADDR_SURF_P2) |
1715 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1716 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1717 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1718 				 PIPE_CONFIG(ADDR_SURF_P2) |
1719 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1720 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1721 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1722 				 PIPE_CONFIG(ADDR_SURF_P2) |
1723 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1724 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1725 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1726 				 PIPE_CONFIG(ADDR_SURF_P2) |
1727 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1728 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1729 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1730 				 PIPE_CONFIG(ADDR_SURF_P2) |
1731 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1732 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1733 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1734 				 PIPE_CONFIG(ADDR_SURF_P2) |
1735 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1736 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1737 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1738 				 PIPE_CONFIG(ADDR_SURF_P2) |
1739 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1740 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1741 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1742 				 PIPE_CONFIG(ADDR_SURF_P2) |
1743 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1744 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1745 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1746 				 PIPE_CONFIG(ADDR_SURF_P2) |
1747 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1748 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1749 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1750 				 PIPE_CONFIG(ADDR_SURF_P2) |
1751 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1752 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1753 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1754 				 PIPE_CONFIG(ADDR_SURF_P2) |
1755 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1756 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1757 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1758 				 PIPE_CONFIG(ADDR_SURF_P2) |
1759 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1760 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1761 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1762 				 PIPE_CONFIG(ADDR_SURF_P2) |
1763 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1764 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1765 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1766 				 PIPE_CONFIG(ADDR_SURF_P2) |
1767 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1768 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1769 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1770 				 PIPE_CONFIG(ADDR_SURF_P2) |
1771 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1772 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1773 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1774 				 PIPE_CONFIG(ADDR_SURF_P2) |
1775 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1776 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1777 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1778 				 PIPE_CONFIG(ADDR_SURF_P2) |
1779 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1780 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1781 
1782 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1783 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1784 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1785 				NUM_BANKS(ADDR_SURF_8_BANK));
1786 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1787 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1788 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1789 				NUM_BANKS(ADDR_SURF_8_BANK));
1790 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1791 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1792 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1793 				NUM_BANKS(ADDR_SURF_8_BANK));
1794 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1795 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1796 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1797 				NUM_BANKS(ADDR_SURF_8_BANK));
1798 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1799 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1800 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1801 				NUM_BANKS(ADDR_SURF_8_BANK));
1802 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1803 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1804 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1805 				NUM_BANKS(ADDR_SURF_8_BANK));
1806 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1807 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1808 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1809 				NUM_BANKS(ADDR_SURF_8_BANK));
1810 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1811 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1812 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1813 				NUM_BANKS(ADDR_SURF_16_BANK));
1814 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1815 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1816 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1817 				NUM_BANKS(ADDR_SURF_16_BANK));
1818 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1819 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1820 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1821 				 NUM_BANKS(ADDR_SURF_16_BANK));
1822 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1823 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1824 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1825 				 NUM_BANKS(ADDR_SURF_16_BANK));
1826 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1827 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1828 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1829 				 NUM_BANKS(ADDR_SURF_16_BANK));
1830 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1831 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1832 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1833 				 NUM_BANKS(ADDR_SURF_16_BANK));
1834 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1835 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1836 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1837 				 NUM_BANKS(ADDR_SURF_8_BANK));
1838 
1839 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1840 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
1841 			    reg_offset != 23)
1842 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
1843 
1844 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1845 			if (reg_offset != 7)
1846 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
1847 
1848 		break;
1849 	case CHIP_FIJI:
1850 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1851 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1852 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1853 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1854 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1855 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1856 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1857 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1858 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1859 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1860 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1861 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1862 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1863 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1864 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1865 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1866 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1867 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1868 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1869 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1870 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1871 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1872 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1873 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1874 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1875 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1876 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1877 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1878 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1879 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1880 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1881 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1882 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1883 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
1884 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1885 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1886 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1887 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1888 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1889 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1890 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1891 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1892 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1893 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1894 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1895 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1896 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1897 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1898 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1899 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1900 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1901 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1902 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1903 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1904 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1905 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1906 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1907 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1908 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1909 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1910 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1911 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1912 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1913 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1914 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1915 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1916 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1917 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1918 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1919 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1920 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1921 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1922 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1923 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1924 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1925 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1926 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1927 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1928 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1929 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1930 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1931 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1932 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1933 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1934 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1935 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1936 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1937 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1938 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1939 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1940 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1941 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1942 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1943 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1944 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1945 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1946 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1947 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1948 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1949 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1950 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1951 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1952 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1953 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1954 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1955 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1956 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1957 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1958 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1959 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1960 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1961 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1962 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1963 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1964 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1965 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1966 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1967 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1968 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1969 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1970 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1971 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1972 
1973 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1974 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1975 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1976 				NUM_BANKS(ADDR_SURF_8_BANK));
1977 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1978 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1979 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1980 				NUM_BANKS(ADDR_SURF_8_BANK));
1981 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1982 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1983 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1984 				NUM_BANKS(ADDR_SURF_8_BANK));
1985 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1986 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1987 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1988 				NUM_BANKS(ADDR_SURF_8_BANK));
1989 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1990 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1991 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1992 				NUM_BANKS(ADDR_SURF_8_BANK));
1993 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1994 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1995 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1996 				NUM_BANKS(ADDR_SURF_8_BANK));
1997 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1998 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1999 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2000 				NUM_BANKS(ADDR_SURF_8_BANK));
2001 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2002 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2003 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2004 				NUM_BANKS(ADDR_SURF_8_BANK));
2005 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2006 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2007 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2008 				NUM_BANKS(ADDR_SURF_8_BANK));
2009 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2010 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2011 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2012 				 NUM_BANKS(ADDR_SURF_8_BANK));
2013 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2014 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2015 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2016 				 NUM_BANKS(ADDR_SURF_8_BANK));
2017 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2018 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2019 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2020 				 NUM_BANKS(ADDR_SURF_8_BANK));
2021 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2022 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2023 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2024 				 NUM_BANKS(ADDR_SURF_8_BANK));
2025 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2026 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2027 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2028 				 NUM_BANKS(ADDR_SURF_4_BANK));
2029 
2030 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2031 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2032 
2033 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2034 			if (reg_offset != 7)
2035 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2036 
2037 		break;
2038 	case CHIP_TONGA:
2039 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2040 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2041 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2042 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2043 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2044 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2045 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2046 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2047 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2048 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2049 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2050 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2051 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2052 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2053 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2054 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2055 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2056 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2057 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2058 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2059 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2060 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2061 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2062 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2063 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2064 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2065 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2066 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2067 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2068 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2069 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2070 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2071 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2072 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2073 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2074 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2075 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2076 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2077 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2078 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2079 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2080 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2081 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2082 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2083 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2084 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2085 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2086 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2087 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2088 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2089 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2090 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2091 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2092 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2093 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2094 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2095 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2096 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2097 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2098 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2099 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2100 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2101 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2102 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2103 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2104 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2105 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2106 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2107 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2108 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2109 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2110 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2111 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2112 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2113 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2114 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2115 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2116 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2117 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2118 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2119 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2120 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2121 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2122 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2123 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2124 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2125 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2126 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2127 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2128 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2129 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2130 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2131 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2132 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2133 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2134 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2135 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2136 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2137 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2138 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2139 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2140 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2141 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2142 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2143 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2144 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2145 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2146 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2147 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2148 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2149 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2150 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2151 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2152 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2153 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2154 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2155 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2156 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2157 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2158 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2159 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2160 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2161 
2162 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2163 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2164 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2165 				NUM_BANKS(ADDR_SURF_16_BANK));
2166 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2167 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2168 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2169 				NUM_BANKS(ADDR_SURF_16_BANK));
2170 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2171 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2172 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2173 				NUM_BANKS(ADDR_SURF_16_BANK));
2174 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2175 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2176 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2177 				NUM_BANKS(ADDR_SURF_16_BANK));
2178 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2179 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2180 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2181 				NUM_BANKS(ADDR_SURF_16_BANK));
2182 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2183 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2184 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2185 				NUM_BANKS(ADDR_SURF_16_BANK));
2186 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2187 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2188 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2189 				NUM_BANKS(ADDR_SURF_16_BANK));
2190 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2191 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2192 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2193 				NUM_BANKS(ADDR_SURF_16_BANK));
2194 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2195 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2196 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2197 				NUM_BANKS(ADDR_SURF_16_BANK));
2198 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2199 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2200 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2201 				 NUM_BANKS(ADDR_SURF_16_BANK));
2202 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2203 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2204 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2205 				 NUM_BANKS(ADDR_SURF_16_BANK));
2206 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2207 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2208 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2209 				 NUM_BANKS(ADDR_SURF_8_BANK));
2210 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2211 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2212 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2213 				 NUM_BANKS(ADDR_SURF_4_BANK));
2214 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2215 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2216 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2217 				 NUM_BANKS(ADDR_SURF_4_BANK));
2218 
2219 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2220 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2221 
2222 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2223 			if (reg_offset != 7)
2224 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2225 
2226 		break;
2227 	case CHIP_STONEY:
2228 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2229 				PIPE_CONFIG(ADDR_SURF_P2) |
2230 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2231 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2232 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2233 				PIPE_CONFIG(ADDR_SURF_P2) |
2234 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2235 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2236 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2237 				PIPE_CONFIG(ADDR_SURF_P2) |
2238 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2239 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2240 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2241 				PIPE_CONFIG(ADDR_SURF_P2) |
2242 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2243 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2244 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2245 				PIPE_CONFIG(ADDR_SURF_P2) |
2246 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2247 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2248 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2249 				PIPE_CONFIG(ADDR_SURF_P2) |
2250 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2251 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2252 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2253 				PIPE_CONFIG(ADDR_SURF_P2) |
2254 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2255 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2256 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2257 				PIPE_CONFIG(ADDR_SURF_P2));
2258 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2259 				PIPE_CONFIG(ADDR_SURF_P2) |
2260 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2261 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2262 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2263 				 PIPE_CONFIG(ADDR_SURF_P2) |
2264 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2265 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2266 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2267 				 PIPE_CONFIG(ADDR_SURF_P2) |
2268 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2269 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2270 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2271 				 PIPE_CONFIG(ADDR_SURF_P2) |
2272 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2273 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2274 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2275 				 PIPE_CONFIG(ADDR_SURF_P2) |
2276 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2277 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2278 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2279 				 PIPE_CONFIG(ADDR_SURF_P2) |
2280 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2281 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2282 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2283 				 PIPE_CONFIG(ADDR_SURF_P2) |
2284 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2285 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2286 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2287 				 PIPE_CONFIG(ADDR_SURF_P2) |
2288 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2289 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2290 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2291 				 PIPE_CONFIG(ADDR_SURF_P2) |
2292 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2293 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2294 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2295 				 PIPE_CONFIG(ADDR_SURF_P2) |
2296 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2297 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2298 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2299 				 PIPE_CONFIG(ADDR_SURF_P2) |
2300 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2301 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2302 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2303 				 PIPE_CONFIG(ADDR_SURF_P2) |
2304 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2305 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2306 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2307 				 PIPE_CONFIG(ADDR_SURF_P2) |
2308 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2309 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2310 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2311 				 PIPE_CONFIG(ADDR_SURF_P2) |
2312 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2313 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2314 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2315 				 PIPE_CONFIG(ADDR_SURF_P2) |
2316 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2317 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2318 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2319 				 PIPE_CONFIG(ADDR_SURF_P2) |
2320 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2321 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2322 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2323 				 PIPE_CONFIG(ADDR_SURF_P2) |
2324 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2325 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2326 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2327 				 PIPE_CONFIG(ADDR_SURF_P2) |
2328 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2329 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2330 
2331 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2332 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2333 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2334 				NUM_BANKS(ADDR_SURF_8_BANK));
2335 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2336 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2337 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2338 				NUM_BANKS(ADDR_SURF_8_BANK));
2339 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2340 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2341 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2342 				NUM_BANKS(ADDR_SURF_8_BANK));
2343 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2344 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2345 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2346 				NUM_BANKS(ADDR_SURF_8_BANK));
2347 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2348 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2349 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2350 				NUM_BANKS(ADDR_SURF_8_BANK));
2351 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2352 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2353 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2354 				NUM_BANKS(ADDR_SURF_8_BANK));
2355 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2356 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2357 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2358 				NUM_BANKS(ADDR_SURF_8_BANK));
2359 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2360 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2361 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2362 				NUM_BANKS(ADDR_SURF_16_BANK));
2363 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2364 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2365 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2366 				NUM_BANKS(ADDR_SURF_16_BANK));
2367 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2368 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2369 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2370 				 NUM_BANKS(ADDR_SURF_16_BANK));
2371 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2372 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2373 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2374 				 NUM_BANKS(ADDR_SURF_16_BANK));
2375 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2376 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2377 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2378 				 NUM_BANKS(ADDR_SURF_16_BANK));
2379 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2380 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2381 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2382 				 NUM_BANKS(ADDR_SURF_16_BANK));
2383 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2384 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2385 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2386 				 NUM_BANKS(ADDR_SURF_8_BANK));
2387 
2388 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2389 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2390 			    reg_offset != 23)
2391 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2392 
2393 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2394 			if (reg_offset != 7)
2395 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2396 
2397 		break;
2398 	default:
2399 		dev_warn(adev->dev,
2400 			 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
2401 			 adev->asic_type);
2402 
2403 	case CHIP_CARRIZO:
2404 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2405 				PIPE_CONFIG(ADDR_SURF_P2) |
2406 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2407 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2408 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2409 				PIPE_CONFIG(ADDR_SURF_P2) |
2410 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2411 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2412 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2413 				PIPE_CONFIG(ADDR_SURF_P2) |
2414 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2415 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2416 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2417 				PIPE_CONFIG(ADDR_SURF_P2) |
2418 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2419 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2420 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2421 				PIPE_CONFIG(ADDR_SURF_P2) |
2422 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2423 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2424 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2425 				PIPE_CONFIG(ADDR_SURF_P2) |
2426 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2427 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2428 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2429 				PIPE_CONFIG(ADDR_SURF_P2) |
2430 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2431 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2432 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2433 				PIPE_CONFIG(ADDR_SURF_P2));
2434 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2435 				PIPE_CONFIG(ADDR_SURF_P2) |
2436 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2437 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2438 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2439 				 PIPE_CONFIG(ADDR_SURF_P2) |
2440 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2441 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2442 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2443 				 PIPE_CONFIG(ADDR_SURF_P2) |
2444 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2445 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2446 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2447 				 PIPE_CONFIG(ADDR_SURF_P2) |
2448 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2449 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2450 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2451 				 PIPE_CONFIG(ADDR_SURF_P2) |
2452 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2453 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2454 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2455 				 PIPE_CONFIG(ADDR_SURF_P2) |
2456 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2457 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2458 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2459 				 PIPE_CONFIG(ADDR_SURF_P2) |
2460 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2461 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2462 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2463 				 PIPE_CONFIG(ADDR_SURF_P2) |
2464 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2465 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2466 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2467 				 PIPE_CONFIG(ADDR_SURF_P2) |
2468 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2469 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2470 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2471 				 PIPE_CONFIG(ADDR_SURF_P2) |
2472 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2473 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2474 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2475 				 PIPE_CONFIG(ADDR_SURF_P2) |
2476 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2477 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2478 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2479 				 PIPE_CONFIG(ADDR_SURF_P2) |
2480 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2481 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2482 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2483 				 PIPE_CONFIG(ADDR_SURF_P2) |
2484 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2485 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2486 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2487 				 PIPE_CONFIG(ADDR_SURF_P2) |
2488 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2489 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2490 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2491 				 PIPE_CONFIG(ADDR_SURF_P2) |
2492 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2493 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2494 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2495 				 PIPE_CONFIG(ADDR_SURF_P2) |
2496 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2497 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2498 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2499 				 PIPE_CONFIG(ADDR_SURF_P2) |
2500 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2501 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2502 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2503 				 PIPE_CONFIG(ADDR_SURF_P2) |
2504 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2505 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2506 
2507 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2508 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2509 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2510 				NUM_BANKS(ADDR_SURF_8_BANK));
2511 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2512 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2513 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2514 				NUM_BANKS(ADDR_SURF_8_BANK));
2515 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2516 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2517 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2518 				NUM_BANKS(ADDR_SURF_8_BANK));
2519 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2520 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2521 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2522 				NUM_BANKS(ADDR_SURF_8_BANK));
2523 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2524 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2525 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2526 				NUM_BANKS(ADDR_SURF_8_BANK));
2527 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2528 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2529 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2530 				NUM_BANKS(ADDR_SURF_8_BANK));
2531 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2532 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2533 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2534 				NUM_BANKS(ADDR_SURF_8_BANK));
2535 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2536 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2537 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2538 				NUM_BANKS(ADDR_SURF_16_BANK));
2539 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2540 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2541 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2542 				NUM_BANKS(ADDR_SURF_16_BANK));
2543 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2544 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2545 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2546 				 NUM_BANKS(ADDR_SURF_16_BANK));
2547 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2548 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2549 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2550 				 NUM_BANKS(ADDR_SURF_16_BANK));
2551 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2552 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2553 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2554 				 NUM_BANKS(ADDR_SURF_16_BANK));
2555 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2556 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2557 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2558 				 NUM_BANKS(ADDR_SURF_16_BANK));
2559 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2560 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2561 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2562 				 NUM_BANKS(ADDR_SURF_8_BANK));
2563 
2564 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2565 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2566 			    reg_offset != 23)
2567 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2568 
2569 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2570 			if (reg_offset != 7)
2571 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2572 
2573 		break;
2574 	}
2575 }
2576 
2577 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
2578 {
2579 	return (u32)((1ULL << bit_width) - 1);
2580 }
2581 
2582 void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
2583 {
2584 	u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2585 
2586 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
2587 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2588 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2589 	} else if (se_num == 0xffffffff) {
2590 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2591 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2592 	} else if (sh_num == 0xffffffff) {
2593 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2594 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2595 	} else {
2596 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2597 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2598 	}
2599 	WREG32(mmGRBM_GFX_INDEX, data);
2600 }
2601 
2602 static u32 gfx_v8_0_get_rb_disabled(struct amdgpu_device *adev,
2603 				    u32 max_rb_num_per_se,
2604 				    u32 sh_per_se)
2605 {
2606 	u32 data, mask;
2607 
2608 	data = RREG32(mmCC_RB_BACKEND_DISABLE);
2609 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2610 
2611 	data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
2612 
2613 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2614 
2615 	mask = gfx_v8_0_create_bitmask(max_rb_num_per_se / sh_per_se);
2616 
2617 	return data & mask;
2618 }
2619 
2620 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev,
2621 			      u32 se_num, u32 sh_per_se,
2622 			      u32 max_rb_num_per_se)
2623 {
2624 	int i, j;
2625 	u32 data, mask;
2626 	u32 disabled_rbs = 0;
2627 	u32 enabled_rbs = 0;
2628 
2629 	mutex_lock(&adev->grbm_idx_mutex);
2630 	for (i = 0; i < se_num; i++) {
2631 		for (j = 0; j < sh_per_se; j++) {
2632 			gfx_v8_0_select_se_sh(adev, i, j);
2633 			data = gfx_v8_0_get_rb_disabled(adev,
2634 					      max_rb_num_per_se, sh_per_se);
2635 			disabled_rbs |= data << ((i * sh_per_se + j) *
2636 						 RB_BITMAP_WIDTH_PER_SH);
2637 		}
2638 	}
2639 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2640 	mutex_unlock(&adev->grbm_idx_mutex);
2641 
2642 	mask = 1;
2643 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
2644 		if (!(disabled_rbs & mask))
2645 			enabled_rbs |= mask;
2646 		mask <<= 1;
2647 	}
2648 
2649 	adev->gfx.config.backend_enable_mask = enabled_rbs;
2650 
2651 	mutex_lock(&adev->grbm_idx_mutex);
2652 	for (i = 0; i < se_num; i++) {
2653 		gfx_v8_0_select_se_sh(adev, i, 0xffffffff);
2654 		data = RREG32(mmPA_SC_RASTER_CONFIG);
2655 		for (j = 0; j < sh_per_se; j++) {
2656 			switch (enabled_rbs & 3) {
2657 			case 0:
2658 				if (j == 0)
2659 					data |= (RASTER_CONFIG_RB_MAP_3 <<
2660 						 PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
2661 				else
2662 					data |= (RASTER_CONFIG_RB_MAP_0 <<
2663 						 PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
2664 				break;
2665 			case 1:
2666 				data |= (RASTER_CONFIG_RB_MAP_0 <<
2667 					 (i * sh_per_se + j) * 2);
2668 				break;
2669 			case 2:
2670 				data |= (RASTER_CONFIG_RB_MAP_3 <<
2671 					 (i * sh_per_se + j) * 2);
2672 				break;
2673 			case 3:
2674 			default:
2675 				data |= (RASTER_CONFIG_RB_MAP_2 <<
2676 					 (i * sh_per_se + j) * 2);
2677 				break;
2678 			}
2679 			enabled_rbs >>= 2;
2680 		}
2681 		WREG32(mmPA_SC_RASTER_CONFIG, data);
2682 	}
2683 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2684 	mutex_unlock(&adev->grbm_idx_mutex);
2685 }
2686 
2687 /**
2688  * gfx_v8_0_init_compute_vmid - gart enable
2689  *
2690  * @rdev: amdgpu_device pointer
2691  *
2692  * Initialize compute vmid sh_mem registers
2693  *
2694  */
2695 #define DEFAULT_SH_MEM_BASES	(0x6000)
2696 #define FIRST_COMPUTE_VMID	(8)
2697 #define LAST_COMPUTE_VMID	(16)
2698 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
2699 {
2700 	int i;
2701 	uint32_t sh_mem_config;
2702 	uint32_t sh_mem_bases;
2703 
2704 	/*
2705 	 * Configure apertures:
2706 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2707 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2708 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2709 	 */
2710 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2711 
2712 	sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
2713 			SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
2714 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2715 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
2716 			MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
2717 			SH_MEM_CONFIG__PRIVATE_ATC_MASK;
2718 
2719 	mutex_lock(&adev->srbm_mutex);
2720 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2721 		vi_srbm_select(adev, 0, 0, 0, i);
2722 		/* CP and shaders */
2723 		WREG32(mmSH_MEM_CONFIG, sh_mem_config);
2724 		WREG32(mmSH_MEM_APE1_BASE, 1);
2725 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
2726 		WREG32(mmSH_MEM_BASES, sh_mem_bases);
2727 	}
2728 	vi_srbm_select(adev, 0, 0, 0, 0);
2729 	mutex_unlock(&adev->srbm_mutex);
2730 }
2731 
2732 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
2733 {
2734 	u32 tmp;
2735 	int i;
2736 
2737 	tmp = RREG32(mmGRBM_CNTL);
2738 	tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
2739 	WREG32(mmGRBM_CNTL, tmp);
2740 
2741 	WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2742 	WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2743 	WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
2744 	WREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET,
2745 	       adev->gfx.config.gb_addr_config & 0x70);
2746 	WREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET,
2747 	       adev->gfx.config.gb_addr_config & 0x70);
2748 	WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2749 	WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2750 	WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2751 
2752 	gfx_v8_0_tiling_mode_table_init(adev);
2753 
2754 	gfx_v8_0_setup_rb(adev, adev->gfx.config.max_shader_engines,
2755 				 adev->gfx.config.max_sh_per_se,
2756 				 adev->gfx.config.max_backends_per_se);
2757 
2758 	/* XXX SH_MEM regs */
2759 	/* where to put LDS, scratch, GPUVM in FSA64 space */
2760 	mutex_lock(&adev->srbm_mutex);
2761 	for (i = 0; i < 16; i++) {
2762 		vi_srbm_select(adev, 0, 0, 0, i);
2763 		/* CP and shaders */
2764 		if (i == 0) {
2765 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
2766 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
2767 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
2768 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2769 			WREG32(mmSH_MEM_CONFIG, tmp);
2770 		} else {
2771 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
2772 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
2773 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
2774 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2775 			WREG32(mmSH_MEM_CONFIG, tmp);
2776 		}
2777 
2778 		WREG32(mmSH_MEM_APE1_BASE, 1);
2779 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
2780 		WREG32(mmSH_MEM_BASES, 0);
2781 	}
2782 	vi_srbm_select(adev, 0, 0, 0, 0);
2783 	mutex_unlock(&adev->srbm_mutex);
2784 
2785 	gfx_v8_0_init_compute_vmid(adev);
2786 
2787 	mutex_lock(&adev->grbm_idx_mutex);
2788 	/*
2789 	 * making sure that the following register writes will be broadcasted
2790 	 * to all the shaders
2791 	 */
2792 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2793 
2794 	WREG32(mmPA_SC_FIFO_SIZE,
2795 		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
2796 			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
2797 		   (adev->gfx.config.sc_prim_fifo_size_backend <<
2798 			PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
2799 		   (adev->gfx.config.sc_hiz_tile_fifo_size <<
2800 			PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
2801 		   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
2802 			PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
2803 	mutex_unlock(&adev->grbm_idx_mutex);
2804 
2805 }
2806 
2807 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2808 {
2809 	u32 i, j, k;
2810 	u32 mask;
2811 
2812 	mutex_lock(&adev->grbm_idx_mutex);
2813 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2814 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2815 			gfx_v8_0_select_se_sh(adev, i, j);
2816 			for (k = 0; k < adev->usec_timeout; k++) {
2817 				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2818 					break;
2819 				udelay(1);
2820 			}
2821 		}
2822 	}
2823 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2824 	mutex_unlock(&adev->grbm_idx_mutex);
2825 
2826 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2827 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2828 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2829 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2830 	for (k = 0; k < adev->usec_timeout; k++) {
2831 		if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2832 			break;
2833 		udelay(1);
2834 	}
2835 }
2836 
2837 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2838 					       bool enable)
2839 {
2840 	u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
2841 
2842 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2843 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2844 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2845 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2846 
2847 	WREG32(mmCP_INT_CNTL_RING0, tmp);
2848 }
2849 
2850 void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
2851 {
2852 	u32 tmp = RREG32(mmRLC_CNTL);
2853 
2854 	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
2855 	WREG32(mmRLC_CNTL, tmp);
2856 
2857 	gfx_v8_0_enable_gui_idle_interrupt(adev, false);
2858 
2859 	gfx_v8_0_wait_for_rlc_serdes(adev);
2860 }
2861 
2862 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
2863 {
2864 	u32 tmp = RREG32(mmGRBM_SOFT_RESET);
2865 
2866 	tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2867 	WREG32(mmGRBM_SOFT_RESET, tmp);
2868 	udelay(50);
2869 	tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2870 	WREG32(mmGRBM_SOFT_RESET, tmp);
2871 	udelay(50);
2872 }
2873 
2874 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
2875 {
2876 	u32 tmp = RREG32(mmRLC_CNTL);
2877 
2878 	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
2879 	WREG32(mmRLC_CNTL, tmp);
2880 
2881 	/* carrizo do enable cp interrupt after cp inited */
2882 	if (!(adev->flags & AMD_IS_APU))
2883 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
2884 
2885 	udelay(50);
2886 }
2887 
2888 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
2889 {
2890 	const struct rlc_firmware_header_v2_0 *hdr;
2891 	const __le32 *fw_data;
2892 	unsigned i, fw_size;
2893 
2894 	if (!adev->gfx.rlc_fw)
2895 		return -EINVAL;
2896 
2897 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2898 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
2899 
2900 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2901 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2902 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2903 
2904 	WREG32(mmRLC_GPM_UCODE_ADDR, 0);
2905 	for (i = 0; i < fw_size; i++)
2906 		WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2907 	WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2908 
2909 	return 0;
2910 }
2911 
2912 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
2913 {
2914 	int r;
2915 
2916 	gfx_v8_0_rlc_stop(adev);
2917 
2918 	/* disable CG */
2919 	WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
2920 
2921 	/* disable PG */
2922 	WREG32(mmRLC_PG_CNTL, 0);
2923 
2924 	gfx_v8_0_rlc_reset(adev);
2925 
2926 	if (!adev->pp_enabled) {
2927 		if (!adev->firmware.smu_load) {
2928 			/* legacy rlc firmware loading */
2929 			r = gfx_v8_0_rlc_load_microcode(adev);
2930 			if (r)
2931 				return r;
2932 		} else {
2933 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
2934 							AMDGPU_UCODE_ID_RLC_G);
2935 			if (r)
2936 				return -EINVAL;
2937 		}
2938 	}
2939 
2940 	gfx_v8_0_rlc_start(adev);
2941 
2942 	return 0;
2943 }
2944 
2945 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2946 {
2947 	int i;
2948 	u32 tmp = RREG32(mmCP_ME_CNTL);
2949 
2950 	if (enable) {
2951 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
2952 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
2953 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
2954 	} else {
2955 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
2956 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
2957 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
2958 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2959 			adev->gfx.gfx_ring[i].ready = false;
2960 	}
2961 	WREG32(mmCP_ME_CNTL, tmp);
2962 	udelay(50);
2963 }
2964 
2965 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2966 {
2967 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
2968 	const struct gfx_firmware_header_v1_0 *ce_hdr;
2969 	const struct gfx_firmware_header_v1_0 *me_hdr;
2970 	const __le32 *fw_data;
2971 	unsigned i, fw_size;
2972 
2973 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2974 		return -EINVAL;
2975 
2976 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2977 		adev->gfx.pfp_fw->data;
2978 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2979 		adev->gfx.ce_fw->data;
2980 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
2981 		adev->gfx.me_fw->data;
2982 
2983 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2984 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2985 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2986 
2987 	gfx_v8_0_cp_gfx_enable(adev, false);
2988 
2989 	/* PFP */
2990 	fw_data = (const __le32 *)
2991 		(adev->gfx.pfp_fw->data +
2992 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2993 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2994 	WREG32(mmCP_PFP_UCODE_ADDR, 0);
2995 	for (i = 0; i < fw_size; i++)
2996 		WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2997 	WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2998 
2999 	/* CE */
3000 	fw_data = (const __le32 *)
3001 		(adev->gfx.ce_fw->data +
3002 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3003 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3004 	WREG32(mmCP_CE_UCODE_ADDR, 0);
3005 	for (i = 0; i < fw_size; i++)
3006 		WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3007 	WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3008 
3009 	/* ME */
3010 	fw_data = (const __le32 *)
3011 		(adev->gfx.me_fw->data +
3012 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3013 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3014 	WREG32(mmCP_ME_RAM_WADDR, 0);
3015 	for (i = 0; i < fw_size; i++)
3016 		WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3017 	WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3018 
3019 	return 0;
3020 }
3021 
3022 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
3023 {
3024 	u32 count = 0;
3025 	const struct cs_section_def *sect = NULL;
3026 	const struct cs_extent_def *ext = NULL;
3027 
3028 	/* begin clear state */
3029 	count += 2;
3030 	/* context control state */
3031 	count += 3;
3032 
3033 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3034 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3035 			if (sect->id == SECT_CONTEXT)
3036 				count += 2 + ext->reg_count;
3037 			else
3038 				return 0;
3039 		}
3040 	}
3041 	/* pa_sc_raster_config/pa_sc_raster_config1 */
3042 	count += 4;
3043 	/* end clear state */
3044 	count += 2;
3045 	/* clear state */
3046 	count += 2;
3047 
3048 	return count;
3049 }
3050 
3051 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
3052 {
3053 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3054 	const struct cs_section_def *sect = NULL;
3055 	const struct cs_extent_def *ext = NULL;
3056 	int r, i;
3057 
3058 	/* init the CP */
3059 	WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3060 	WREG32(mmCP_ENDIAN_SWAP, 0);
3061 	WREG32(mmCP_DEVICE_ID, 1);
3062 
3063 	gfx_v8_0_cp_gfx_enable(adev, true);
3064 
3065 	r = amdgpu_ring_lock(ring, gfx_v8_0_get_csb_size(adev) + 4);
3066 	if (r) {
3067 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3068 		return r;
3069 	}
3070 
3071 	/* clear state buffer */
3072 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3073 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3074 
3075 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3076 	amdgpu_ring_write(ring, 0x80000000);
3077 	amdgpu_ring_write(ring, 0x80000000);
3078 
3079 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3080 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3081 			if (sect->id == SECT_CONTEXT) {
3082 				amdgpu_ring_write(ring,
3083 				       PACKET3(PACKET3_SET_CONTEXT_REG,
3084 					       ext->reg_count));
3085 				amdgpu_ring_write(ring,
3086 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3087 				for (i = 0; i < ext->reg_count; i++)
3088 					amdgpu_ring_write(ring, ext->extent[i]);
3089 			}
3090 		}
3091 	}
3092 
3093 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3094 	amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
3095 	switch (adev->asic_type) {
3096 	case CHIP_TONGA:
3097 		amdgpu_ring_write(ring, 0x16000012);
3098 		amdgpu_ring_write(ring, 0x0000002A);
3099 		break;
3100 	case CHIP_FIJI:
3101 		amdgpu_ring_write(ring, 0x3a00161a);
3102 		amdgpu_ring_write(ring, 0x0000002e);
3103 		break;
3104 	case CHIP_TOPAZ:
3105 	case CHIP_CARRIZO:
3106 		amdgpu_ring_write(ring, 0x00000002);
3107 		amdgpu_ring_write(ring, 0x00000000);
3108 		break;
3109 	case CHIP_STONEY:
3110 		amdgpu_ring_write(ring, 0x00000000);
3111 		amdgpu_ring_write(ring, 0x00000000);
3112 		break;
3113 	default:
3114 		BUG();
3115 	}
3116 
3117 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3118 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3119 
3120 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3121 	amdgpu_ring_write(ring, 0);
3122 
3123 	/* init the CE partitions */
3124 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3125 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3126 	amdgpu_ring_write(ring, 0x8000);
3127 	amdgpu_ring_write(ring, 0x8000);
3128 
3129 	amdgpu_ring_unlock_commit(ring);
3130 
3131 	return 0;
3132 }
3133 
3134 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
3135 {
3136 	struct amdgpu_ring *ring;
3137 	u32 tmp;
3138 	u32 rb_bufsz;
3139 	u64 rb_addr, rptr_addr;
3140 	int r;
3141 
3142 	/* Set the write pointer delay */
3143 	WREG32(mmCP_RB_WPTR_DELAY, 0);
3144 
3145 	/* set the RB to use vmid 0 */
3146 	WREG32(mmCP_RB_VMID, 0);
3147 
3148 	/* Set ring buffer size */
3149 	ring = &adev->gfx.gfx_ring[0];
3150 	rb_bufsz = order_base_2(ring->ring_size / 8);
3151 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3152 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3153 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
3154 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
3155 #ifdef __BIG_ENDIAN
3156 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3157 #endif
3158 	WREG32(mmCP_RB0_CNTL, tmp);
3159 
3160 	/* Initialize the ring buffer's read and write pointers */
3161 	WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
3162 	ring->wptr = 0;
3163 	WREG32(mmCP_RB0_WPTR, ring->wptr);
3164 
3165 	/* set the wb address wether it's enabled or not */
3166 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3167 	WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3168 	WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
3169 
3170 	mdelay(1);
3171 	WREG32(mmCP_RB0_CNTL, tmp);
3172 
3173 	rb_addr = ring->gpu_addr >> 8;
3174 	WREG32(mmCP_RB0_BASE, rb_addr);
3175 	WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3176 
3177 	/* no gfx doorbells on iceland */
3178 	if (adev->asic_type != CHIP_TOPAZ) {
3179 		tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
3180 		if (ring->use_doorbell) {
3181 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3182 					    DOORBELL_OFFSET, ring->doorbell_index);
3183 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3184 					    DOORBELL_EN, 1);
3185 		} else {
3186 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3187 					    DOORBELL_EN, 0);
3188 		}
3189 		WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
3190 
3191 		if (adev->asic_type == CHIP_TONGA) {
3192 			tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3193 					    DOORBELL_RANGE_LOWER,
3194 					    AMDGPU_DOORBELL_GFX_RING0);
3195 			WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3196 
3197 			WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
3198 			       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3199 		}
3200 
3201 	}
3202 
3203 	/* start the ring */
3204 	gfx_v8_0_cp_gfx_start(adev);
3205 	ring->ready = true;
3206 	r = amdgpu_ring_test_ring(ring);
3207 	if (r) {
3208 		ring->ready = false;
3209 		return r;
3210 	}
3211 
3212 	return 0;
3213 }
3214 
3215 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3216 {
3217 	int i;
3218 
3219 	if (enable) {
3220 		WREG32(mmCP_MEC_CNTL, 0);
3221 	} else {
3222 		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3223 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
3224 			adev->gfx.compute_ring[i].ready = false;
3225 	}
3226 	udelay(50);
3227 }
3228 
3229 static int gfx_v8_0_cp_compute_start(struct amdgpu_device *adev)
3230 {
3231 	gfx_v8_0_cp_compute_enable(adev, true);
3232 
3233 	return 0;
3234 }
3235 
3236 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3237 {
3238 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3239 	const __le32 *fw_data;
3240 	unsigned i, fw_size;
3241 
3242 	if (!adev->gfx.mec_fw)
3243 		return -EINVAL;
3244 
3245 	gfx_v8_0_cp_compute_enable(adev, false);
3246 
3247 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3248 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3249 
3250 	fw_data = (const __le32 *)
3251 		(adev->gfx.mec_fw->data +
3252 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3253 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
3254 
3255 	/* MEC1 */
3256 	WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
3257 	for (i = 0; i < fw_size; i++)
3258 		WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
3259 	WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
3260 
3261 	/* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3262 	if (adev->gfx.mec2_fw) {
3263 		const struct gfx_firmware_header_v1_0 *mec2_hdr;
3264 
3265 		mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
3266 		amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
3267 
3268 		fw_data = (const __le32 *)
3269 			(adev->gfx.mec2_fw->data +
3270 			 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
3271 		fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
3272 
3273 		WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
3274 		for (i = 0; i < fw_size; i++)
3275 			WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
3276 		WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
3277 	}
3278 
3279 	return 0;
3280 }
3281 
3282 struct vi_mqd {
3283 	uint32_t header;  /* ordinal0 */
3284 	uint32_t compute_dispatch_initiator;  /* ordinal1 */
3285 	uint32_t compute_dim_x;  /* ordinal2 */
3286 	uint32_t compute_dim_y;  /* ordinal3 */
3287 	uint32_t compute_dim_z;  /* ordinal4 */
3288 	uint32_t compute_start_x;  /* ordinal5 */
3289 	uint32_t compute_start_y;  /* ordinal6 */
3290 	uint32_t compute_start_z;  /* ordinal7 */
3291 	uint32_t compute_num_thread_x;  /* ordinal8 */
3292 	uint32_t compute_num_thread_y;  /* ordinal9 */
3293 	uint32_t compute_num_thread_z;  /* ordinal10 */
3294 	uint32_t compute_pipelinestat_enable;  /* ordinal11 */
3295 	uint32_t compute_perfcount_enable;  /* ordinal12 */
3296 	uint32_t compute_pgm_lo;  /* ordinal13 */
3297 	uint32_t compute_pgm_hi;  /* ordinal14 */
3298 	uint32_t compute_tba_lo;  /* ordinal15 */
3299 	uint32_t compute_tba_hi;  /* ordinal16 */
3300 	uint32_t compute_tma_lo;  /* ordinal17 */
3301 	uint32_t compute_tma_hi;  /* ordinal18 */
3302 	uint32_t compute_pgm_rsrc1;  /* ordinal19 */
3303 	uint32_t compute_pgm_rsrc2;  /* ordinal20 */
3304 	uint32_t compute_vmid;  /* ordinal21 */
3305 	uint32_t compute_resource_limits;  /* ordinal22 */
3306 	uint32_t compute_static_thread_mgmt_se0;  /* ordinal23 */
3307 	uint32_t compute_static_thread_mgmt_se1;  /* ordinal24 */
3308 	uint32_t compute_tmpring_size;  /* ordinal25 */
3309 	uint32_t compute_static_thread_mgmt_se2;  /* ordinal26 */
3310 	uint32_t compute_static_thread_mgmt_se3;  /* ordinal27 */
3311 	uint32_t compute_restart_x;  /* ordinal28 */
3312 	uint32_t compute_restart_y;  /* ordinal29 */
3313 	uint32_t compute_restart_z;  /* ordinal30 */
3314 	uint32_t compute_thread_trace_enable;  /* ordinal31 */
3315 	uint32_t compute_misc_reserved;  /* ordinal32 */
3316 	uint32_t compute_dispatch_id;  /* ordinal33 */
3317 	uint32_t compute_threadgroup_id;  /* ordinal34 */
3318 	uint32_t compute_relaunch;  /* ordinal35 */
3319 	uint32_t compute_wave_restore_addr_lo;  /* ordinal36 */
3320 	uint32_t compute_wave_restore_addr_hi;  /* ordinal37 */
3321 	uint32_t compute_wave_restore_control;  /* ordinal38 */
3322 	uint32_t reserved9;  /* ordinal39 */
3323 	uint32_t reserved10;  /* ordinal40 */
3324 	uint32_t reserved11;  /* ordinal41 */
3325 	uint32_t reserved12;  /* ordinal42 */
3326 	uint32_t reserved13;  /* ordinal43 */
3327 	uint32_t reserved14;  /* ordinal44 */
3328 	uint32_t reserved15;  /* ordinal45 */
3329 	uint32_t reserved16;  /* ordinal46 */
3330 	uint32_t reserved17;  /* ordinal47 */
3331 	uint32_t reserved18;  /* ordinal48 */
3332 	uint32_t reserved19;  /* ordinal49 */
3333 	uint32_t reserved20;  /* ordinal50 */
3334 	uint32_t reserved21;  /* ordinal51 */
3335 	uint32_t reserved22;  /* ordinal52 */
3336 	uint32_t reserved23;  /* ordinal53 */
3337 	uint32_t reserved24;  /* ordinal54 */
3338 	uint32_t reserved25;  /* ordinal55 */
3339 	uint32_t reserved26;  /* ordinal56 */
3340 	uint32_t reserved27;  /* ordinal57 */
3341 	uint32_t reserved28;  /* ordinal58 */
3342 	uint32_t reserved29;  /* ordinal59 */
3343 	uint32_t reserved30;  /* ordinal60 */
3344 	uint32_t reserved31;  /* ordinal61 */
3345 	uint32_t reserved32;  /* ordinal62 */
3346 	uint32_t reserved33;  /* ordinal63 */
3347 	uint32_t reserved34;  /* ordinal64 */
3348 	uint32_t compute_user_data_0;  /* ordinal65 */
3349 	uint32_t compute_user_data_1;  /* ordinal66 */
3350 	uint32_t compute_user_data_2;  /* ordinal67 */
3351 	uint32_t compute_user_data_3;  /* ordinal68 */
3352 	uint32_t compute_user_data_4;  /* ordinal69 */
3353 	uint32_t compute_user_data_5;  /* ordinal70 */
3354 	uint32_t compute_user_data_6;  /* ordinal71 */
3355 	uint32_t compute_user_data_7;  /* ordinal72 */
3356 	uint32_t compute_user_data_8;  /* ordinal73 */
3357 	uint32_t compute_user_data_9;  /* ordinal74 */
3358 	uint32_t compute_user_data_10;  /* ordinal75 */
3359 	uint32_t compute_user_data_11;  /* ordinal76 */
3360 	uint32_t compute_user_data_12;  /* ordinal77 */
3361 	uint32_t compute_user_data_13;  /* ordinal78 */
3362 	uint32_t compute_user_data_14;  /* ordinal79 */
3363 	uint32_t compute_user_data_15;  /* ordinal80 */
3364 	uint32_t cp_compute_csinvoc_count_lo;  /* ordinal81 */
3365 	uint32_t cp_compute_csinvoc_count_hi;  /* ordinal82 */
3366 	uint32_t reserved35;  /* ordinal83 */
3367 	uint32_t reserved36;  /* ordinal84 */
3368 	uint32_t reserved37;  /* ordinal85 */
3369 	uint32_t cp_mqd_query_time_lo;  /* ordinal86 */
3370 	uint32_t cp_mqd_query_time_hi;  /* ordinal87 */
3371 	uint32_t cp_mqd_connect_start_time_lo;  /* ordinal88 */
3372 	uint32_t cp_mqd_connect_start_time_hi;  /* ordinal89 */
3373 	uint32_t cp_mqd_connect_end_time_lo;  /* ordinal90 */
3374 	uint32_t cp_mqd_connect_end_time_hi;  /* ordinal91 */
3375 	uint32_t cp_mqd_connect_end_wf_count;  /* ordinal92 */
3376 	uint32_t cp_mqd_connect_end_pq_rptr;  /* ordinal93 */
3377 	uint32_t cp_mqd_connect_end_pq_wptr;  /* ordinal94 */
3378 	uint32_t cp_mqd_connect_end_ib_rptr;  /* ordinal95 */
3379 	uint32_t reserved38;  /* ordinal96 */
3380 	uint32_t reserved39;  /* ordinal97 */
3381 	uint32_t cp_mqd_save_start_time_lo;  /* ordinal98 */
3382 	uint32_t cp_mqd_save_start_time_hi;  /* ordinal99 */
3383 	uint32_t cp_mqd_save_end_time_lo;  /* ordinal100 */
3384 	uint32_t cp_mqd_save_end_time_hi;  /* ordinal101 */
3385 	uint32_t cp_mqd_restore_start_time_lo;  /* ordinal102 */
3386 	uint32_t cp_mqd_restore_start_time_hi;  /* ordinal103 */
3387 	uint32_t cp_mqd_restore_end_time_lo;  /* ordinal104 */
3388 	uint32_t cp_mqd_restore_end_time_hi;  /* ordinal105 */
3389 	uint32_t reserved40;  /* ordinal106 */
3390 	uint32_t reserved41;  /* ordinal107 */
3391 	uint32_t gds_cs_ctxsw_cnt0;  /* ordinal108 */
3392 	uint32_t gds_cs_ctxsw_cnt1;  /* ordinal109 */
3393 	uint32_t gds_cs_ctxsw_cnt2;  /* ordinal110 */
3394 	uint32_t gds_cs_ctxsw_cnt3;  /* ordinal111 */
3395 	uint32_t reserved42;  /* ordinal112 */
3396 	uint32_t reserved43;  /* ordinal113 */
3397 	uint32_t cp_pq_exe_status_lo;  /* ordinal114 */
3398 	uint32_t cp_pq_exe_status_hi;  /* ordinal115 */
3399 	uint32_t cp_packet_id_lo;  /* ordinal116 */
3400 	uint32_t cp_packet_id_hi;  /* ordinal117 */
3401 	uint32_t cp_packet_exe_status_lo;  /* ordinal118 */
3402 	uint32_t cp_packet_exe_status_hi;  /* ordinal119 */
3403 	uint32_t gds_save_base_addr_lo;  /* ordinal120 */
3404 	uint32_t gds_save_base_addr_hi;  /* ordinal121 */
3405 	uint32_t gds_save_mask_lo;  /* ordinal122 */
3406 	uint32_t gds_save_mask_hi;  /* ordinal123 */
3407 	uint32_t ctx_save_base_addr_lo;  /* ordinal124 */
3408 	uint32_t ctx_save_base_addr_hi;  /* ordinal125 */
3409 	uint32_t reserved44;  /* ordinal126 */
3410 	uint32_t reserved45;  /* ordinal127 */
3411 	uint32_t cp_mqd_base_addr_lo;  /* ordinal128 */
3412 	uint32_t cp_mqd_base_addr_hi;  /* ordinal129 */
3413 	uint32_t cp_hqd_active;  /* ordinal130 */
3414 	uint32_t cp_hqd_vmid;  /* ordinal131 */
3415 	uint32_t cp_hqd_persistent_state;  /* ordinal132 */
3416 	uint32_t cp_hqd_pipe_priority;  /* ordinal133 */
3417 	uint32_t cp_hqd_queue_priority;  /* ordinal134 */
3418 	uint32_t cp_hqd_quantum;  /* ordinal135 */
3419 	uint32_t cp_hqd_pq_base_lo;  /* ordinal136 */
3420 	uint32_t cp_hqd_pq_base_hi;  /* ordinal137 */
3421 	uint32_t cp_hqd_pq_rptr;  /* ordinal138 */
3422 	uint32_t cp_hqd_pq_rptr_report_addr_lo;  /* ordinal139 */
3423 	uint32_t cp_hqd_pq_rptr_report_addr_hi;  /* ordinal140 */
3424 	uint32_t cp_hqd_pq_wptr_poll_addr;  /* ordinal141 */
3425 	uint32_t cp_hqd_pq_wptr_poll_addr_hi;  /* ordinal142 */
3426 	uint32_t cp_hqd_pq_doorbell_control;  /* ordinal143 */
3427 	uint32_t cp_hqd_pq_wptr;  /* ordinal144 */
3428 	uint32_t cp_hqd_pq_control;  /* ordinal145 */
3429 	uint32_t cp_hqd_ib_base_addr_lo;  /* ordinal146 */
3430 	uint32_t cp_hqd_ib_base_addr_hi;  /* ordinal147 */
3431 	uint32_t cp_hqd_ib_rptr;  /* ordinal148 */
3432 	uint32_t cp_hqd_ib_control;  /* ordinal149 */
3433 	uint32_t cp_hqd_iq_timer;  /* ordinal150 */
3434 	uint32_t cp_hqd_iq_rptr;  /* ordinal151 */
3435 	uint32_t cp_hqd_dequeue_request;  /* ordinal152 */
3436 	uint32_t cp_hqd_dma_offload;  /* ordinal153 */
3437 	uint32_t cp_hqd_sema_cmd;  /* ordinal154 */
3438 	uint32_t cp_hqd_msg_type;  /* ordinal155 */
3439 	uint32_t cp_hqd_atomic0_preop_lo;  /* ordinal156 */
3440 	uint32_t cp_hqd_atomic0_preop_hi;  /* ordinal157 */
3441 	uint32_t cp_hqd_atomic1_preop_lo;  /* ordinal158 */
3442 	uint32_t cp_hqd_atomic1_preop_hi;  /* ordinal159 */
3443 	uint32_t cp_hqd_hq_status0;  /* ordinal160 */
3444 	uint32_t cp_hqd_hq_control0;  /* ordinal161 */
3445 	uint32_t cp_mqd_control;  /* ordinal162 */
3446 	uint32_t cp_hqd_hq_status1;  /* ordinal163 */
3447 	uint32_t cp_hqd_hq_control1;  /* ordinal164 */
3448 	uint32_t cp_hqd_eop_base_addr_lo;  /* ordinal165 */
3449 	uint32_t cp_hqd_eop_base_addr_hi;  /* ordinal166 */
3450 	uint32_t cp_hqd_eop_control;  /* ordinal167 */
3451 	uint32_t cp_hqd_eop_rptr;  /* ordinal168 */
3452 	uint32_t cp_hqd_eop_wptr;  /* ordinal169 */
3453 	uint32_t cp_hqd_eop_done_events;  /* ordinal170 */
3454 	uint32_t cp_hqd_ctx_save_base_addr_lo;  /* ordinal171 */
3455 	uint32_t cp_hqd_ctx_save_base_addr_hi;  /* ordinal172 */
3456 	uint32_t cp_hqd_ctx_save_control;  /* ordinal173 */
3457 	uint32_t cp_hqd_cntl_stack_offset;  /* ordinal174 */
3458 	uint32_t cp_hqd_cntl_stack_size;  /* ordinal175 */
3459 	uint32_t cp_hqd_wg_state_offset;  /* ordinal176 */
3460 	uint32_t cp_hqd_ctx_save_size;  /* ordinal177 */
3461 	uint32_t cp_hqd_gds_resource_state;  /* ordinal178 */
3462 	uint32_t cp_hqd_error;  /* ordinal179 */
3463 	uint32_t cp_hqd_eop_wptr_mem;  /* ordinal180 */
3464 	uint32_t cp_hqd_eop_dones;  /* ordinal181 */
3465 	uint32_t reserved46;  /* ordinal182 */
3466 	uint32_t reserved47;  /* ordinal183 */
3467 	uint32_t reserved48;  /* ordinal184 */
3468 	uint32_t reserved49;  /* ordinal185 */
3469 	uint32_t reserved50;  /* ordinal186 */
3470 	uint32_t reserved51;  /* ordinal187 */
3471 	uint32_t reserved52;  /* ordinal188 */
3472 	uint32_t reserved53;  /* ordinal189 */
3473 	uint32_t reserved54;  /* ordinal190 */
3474 	uint32_t reserved55;  /* ordinal191 */
3475 	uint32_t iqtimer_pkt_header;  /* ordinal192 */
3476 	uint32_t iqtimer_pkt_dw0;  /* ordinal193 */
3477 	uint32_t iqtimer_pkt_dw1;  /* ordinal194 */
3478 	uint32_t iqtimer_pkt_dw2;  /* ordinal195 */
3479 	uint32_t iqtimer_pkt_dw3;  /* ordinal196 */
3480 	uint32_t iqtimer_pkt_dw4;  /* ordinal197 */
3481 	uint32_t iqtimer_pkt_dw5;  /* ordinal198 */
3482 	uint32_t iqtimer_pkt_dw6;  /* ordinal199 */
3483 	uint32_t iqtimer_pkt_dw7;  /* ordinal200 */
3484 	uint32_t iqtimer_pkt_dw8;  /* ordinal201 */
3485 	uint32_t iqtimer_pkt_dw9;  /* ordinal202 */
3486 	uint32_t iqtimer_pkt_dw10;  /* ordinal203 */
3487 	uint32_t iqtimer_pkt_dw11;  /* ordinal204 */
3488 	uint32_t iqtimer_pkt_dw12;  /* ordinal205 */
3489 	uint32_t iqtimer_pkt_dw13;  /* ordinal206 */
3490 	uint32_t iqtimer_pkt_dw14;  /* ordinal207 */
3491 	uint32_t iqtimer_pkt_dw15;  /* ordinal208 */
3492 	uint32_t iqtimer_pkt_dw16;  /* ordinal209 */
3493 	uint32_t iqtimer_pkt_dw17;  /* ordinal210 */
3494 	uint32_t iqtimer_pkt_dw18;  /* ordinal211 */
3495 	uint32_t iqtimer_pkt_dw19;  /* ordinal212 */
3496 	uint32_t iqtimer_pkt_dw20;  /* ordinal213 */
3497 	uint32_t iqtimer_pkt_dw21;  /* ordinal214 */
3498 	uint32_t iqtimer_pkt_dw22;  /* ordinal215 */
3499 	uint32_t iqtimer_pkt_dw23;  /* ordinal216 */
3500 	uint32_t iqtimer_pkt_dw24;  /* ordinal217 */
3501 	uint32_t iqtimer_pkt_dw25;  /* ordinal218 */
3502 	uint32_t iqtimer_pkt_dw26;  /* ordinal219 */
3503 	uint32_t iqtimer_pkt_dw27;  /* ordinal220 */
3504 	uint32_t iqtimer_pkt_dw28;  /* ordinal221 */
3505 	uint32_t iqtimer_pkt_dw29;  /* ordinal222 */
3506 	uint32_t iqtimer_pkt_dw30;  /* ordinal223 */
3507 	uint32_t iqtimer_pkt_dw31;  /* ordinal224 */
3508 	uint32_t reserved56;  /* ordinal225 */
3509 	uint32_t reserved57;  /* ordinal226 */
3510 	uint32_t reserved58;  /* ordinal227 */
3511 	uint32_t set_resources_header;  /* ordinal228 */
3512 	uint32_t set_resources_dw1;  /* ordinal229 */
3513 	uint32_t set_resources_dw2;  /* ordinal230 */
3514 	uint32_t set_resources_dw3;  /* ordinal231 */
3515 	uint32_t set_resources_dw4;  /* ordinal232 */
3516 	uint32_t set_resources_dw5;  /* ordinal233 */
3517 	uint32_t set_resources_dw6;  /* ordinal234 */
3518 	uint32_t set_resources_dw7;  /* ordinal235 */
3519 	uint32_t reserved59;  /* ordinal236 */
3520 	uint32_t reserved60;  /* ordinal237 */
3521 	uint32_t reserved61;  /* ordinal238 */
3522 	uint32_t reserved62;  /* ordinal239 */
3523 	uint32_t reserved63;  /* ordinal240 */
3524 	uint32_t reserved64;  /* ordinal241 */
3525 	uint32_t reserved65;  /* ordinal242 */
3526 	uint32_t reserved66;  /* ordinal243 */
3527 	uint32_t reserved67;  /* ordinal244 */
3528 	uint32_t reserved68;  /* ordinal245 */
3529 	uint32_t reserved69;  /* ordinal246 */
3530 	uint32_t reserved70;  /* ordinal247 */
3531 	uint32_t reserved71;  /* ordinal248 */
3532 	uint32_t reserved72;  /* ordinal249 */
3533 	uint32_t reserved73;  /* ordinal250 */
3534 	uint32_t reserved74;  /* ordinal251 */
3535 	uint32_t reserved75;  /* ordinal252 */
3536 	uint32_t reserved76;  /* ordinal253 */
3537 	uint32_t reserved77;  /* ordinal254 */
3538 	uint32_t reserved78;  /* ordinal255 */
3539 
3540 	uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
3541 };
3542 
3543 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
3544 {
3545 	int i, r;
3546 
3547 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3548 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3549 
3550 		if (ring->mqd_obj) {
3551 			r = amdgpu_bo_reserve(ring->mqd_obj, false);
3552 			if (unlikely(r != 0))
3553 				dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
3554 
3555 			amdgpu_bo_unpin(ring->mqd_obj);
3556 			amdgpu_bo_unreserve(ring->mqd_obj);
3557 
3558 			amdgpu_bo_unref(&ring->mqd_obj);
3559 			ring->mqd_obj = NULL;
3560 		}
3561 	}
3562 }
3563 
3564 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
3565 {
3566 	int r, i, j;
3567 	u32 tmp;
3568 	bool use_doorbell = true;
3569 	u64 hqd_gpu_addr;
3570 	u64 mqd_gpu_addr;
3571 	u64 eop_gpu_addr;
3572 	u64 wb_gpu_addr;
3573 	u32 *buf;
3574 	struct vi_mqd *mqd;
3575 
3576 	/* init the pipes */
3577 	mutex_lock(&adev->srbm_mutex);
3578 	for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
3579 		int me = (i < 4) ? 1 : 2;
3580 		int pipe = (i < 4) ? i : (i - 4);
3581 
3582 		eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
3583 		eop_gpu_addr >>= 8;
3584 
3585 		vi_srbm_select(adev, me, pipe, 0, 0);
3586 
3587 		/* write the EOP addr */
3588 		WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
3589 		WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
3590 
3591 		/* set the VMID assigned */
3592 		WREG32(mmCP_HQD_VMID, 0);
3593 
3594 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3595 		tmp = RREG32(mmCP_HQD_EOP_CONTROL);
3596 		tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3597 				    (order_base_2(MEC_HPD_SIZE / 4) - 1));
3598 		WREG32(mmCP_HQD_EOP_CONTROL, tmp);
3599 	}
3600 	vi_srbm_select(adev, 0, 0, 0, 0);
3601 	mutex_unlock(&adev->srbm_mutex);
3602 
3603 	/* init the queues.  Just two for now. */
3604 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3605 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3606 
3607 		if (ring->mqd_obj == NULL) {
3608 			r = amdgpu_bo_create(adev,
3609 					     sizeof(struct vi_mqd),
3610 					     PAGE_SIZE, true,
3611 					     AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
3612 					     NULL, &ring->mqd_obj);
3613 			if (r) {
3614 				dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
3615 				return r;
3616 			}
3617 		}
3618 
3619 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3620 		if (unlikely(r != 0)) {
3621 			gfx_v8_0_cp_compute_fini(adev);
3622 			return r;
3623 		}
3624 		r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
3625 				  &mqd_gpu_addr);
3626 		if (r) {
3627 			dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
3628 			gfx_v8_0_cp_compute_fini(adev);
3629 			return r;
3630 		}
3631 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
3632 		if (r) {
3633 			dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
3634 			gfx_v8_0_cp_compute_fini(adev);
3635 			return r;
3636 		}
3637 
3638 		/* init the mqd struct */
3639 		memset(buf, 0, sizeof(struct vi_mqd));
3640 
3641 		mqd = (struct vi_mqd *)buf;
3642 		mqd->header = 0xC0310800;
3643 		mqd->compute_pipelinestat_enable = 0x00000001;
3644 		mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3645 		mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3646 		mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3647 		mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3648 		mqd->compute_misc_reserved = 0x00000003;
3649 
3650 		mutex_lock(&adev->srbm_mutex);
3651 		vi_srbm_select(adev, ring->me,
3652 			       ring->pipe,
3653 			       ring->queue, 0);
3654 
3655 		/* disable wptr polling */
3656 		tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
3657 		tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3658 		WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
3659 
3660 		mqd->cp_hqd_eop_base_addr_lo =
3661 			RREG32(mmCP_HQD_EOP_BASE_ADDR);
3662 		mqd->cp_hqd_eop_base_addr_hi =
3663 			RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
3664 
3665 		/* enable doorbell? */
3666 		tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3667 		if (use_doorbell) {
3668 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3669 		} else {
3670 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
3671 		}
3672 		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
3673 		mqd->cp_hqd_pq_doorbell_control = tmp;
3674 
3675 		/* disable the queue if it's active */
3676 		mqd->cp_hqd_dequeue_request = 0;
3677 		mqd->cp_hqd_pq_rptr = 0;
3678 		mqd->cp_hqd_pq_wptr= 0;
3679 		if (RREG32(mmCP_HQD_ACTIVE) & 1) {
3680 			WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
3681 			for (j = 0; j < adev->usec_timeout; j++) {
3682 				if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
3683 					break;
3684 				udelay(1);
3685 			}
3686 			WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
3687 			WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
3688 			WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
3689 		}
3690 
3691 		/* set the pointer to the MQD */
3692 		mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
3693 		mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3694 		WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
3695 		WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
3696 
3697 		/* set MQD vmid to 0 */
3698 		tmp = RREG32(mmCP_MQD_CONTROL);
3699 		tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3700 		WREG32(mmCP_MQD_CONTROL, tmp);
3701 		mqd->cp_mqd_control = tmp;
3702 
3703 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3704 		hqd_gpu_addr = ring->gpu_addr >> 8;
3705 		mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3706 		mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3707 		WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
3708 		WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
3709 
3710 		/* set up the HQD, this is similar to CP_RB0_CNTL */
3711 		tmp = RREG32(mmCP_HQD_PQ_CONTROL);
3712 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3713 				    (order_base_2(ring->ring_size / 4) - 1));
3714 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3715 			       ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3716 #ifdef __BIG_ENDIAN
3717 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3718 #endif
3719 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3720 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3721 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3722 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3723 		WREG32(mmCP_HQD_PQ_CONTROL, tmp);
3724 		mqd->cp_hqd_pq_control = tmp;
3725 
3726 		/* set the wb address wether it's enabled or not */
3727 		wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3728 		mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3729 		mqd->cp_hqd_pq_rptr_report_addr_hi =
3730 			upper_32_bits(wb_gpu_addr) & 0xffff;
3731 		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3732 		       mqd->cp_hqd_pq_rptr_report_addr_lo);
3733 		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3734 		       mqd->cp_hqd_pq_rptr_report_addr_hi);
3735 
3736 		/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3737 		wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3738 		mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3739 		mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3740 		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
3741 		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3742 		       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3743 
3744 		/* enable the doorbell if requested */
3745 		if (use_doorbell) {
3746 			if ((adev->asic_type == CHIP_CARRIZO) ||
3747 			    (adev->asic_type == CHIP_FIJI) ||
3748 			    (adev->asic_type == CHIP_STONEY)) {
3749 				WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
3750 				       AMDGPU_DOORBELL_KIQ << 2);
3751 				WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
3752 				       AMDGPU_DOORBELL_MEC_RING7 << 2);
3753 			}
3754 			tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3755 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3756 					    DOORBELL_OFFSET, ring->doorbell_index);
3757 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3758 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
3759 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
3760 			mqd->cp_hqd_pq_doorbell_control = tmp;
3761 
3762 		} else {
3763 			mqd->cp_hqd_pq_doorbell_control = 0;
3764 		}
3765 		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
3766 		       mqd->cp_hqd_pq_doorbell_control);
3767 
3768 		/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3769 		ring->wptr = 0;
3770 		mqd->cp_hqd_pq_wptr = ring->wptr;
3771 		WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
3772 		mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
3773 
3774 		/* set the vmid for the queue */
3775 		mqd->cp_hqd_vmid = 0;
3776 		WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3777 
3778 		tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
3779 		tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3780 		WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
3781 		mqd->cp_hqd_persistent_state = tmp;
3782 		if (adev->asic_type == CHIP_STONEY) {
3783 			tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
3784 			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
3785 			WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
3786 		}
3787 
3788 		/* activate the queue */
3789 		mqd->cp_hqd_active = 1;
3790 		WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
3791 
3792 		vi_srbm_select(adev, 0, 0, 0, 0);
3793 		mutex_unlock(&adev->srbm_mutex);
3794 
3795 		amdgpu_bo_kunmap(ring->mqd_obj);
3796 		amdgpu_bo_unreserve(ring->mqd_obj);
3797 	}
3798 
3799 	if (use_doorbell) {
3800 		tmp = RREG32(mmCP_PQ_STATUS);
3801 		tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3802 		WREG32(mmCP_PQ_STATUS, tmp);
3803 	}
3804 
3805 	r = gfx_v8_0_cp_compute_start(adev);
3806 	if (r)
3807 		return r;
3808 
3809 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3810 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3811 
3812 		ring->ready = true;
3813 		r = amdgpu_ring_test_ring(ring);
3814 		if (r)
3815 			ring->ready = false;
3816 	}
3817 
3818 	return 0;
3819 }
3820 
3821 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
3822 {
3823 	int r;
3824 
3825 	if (!(adev->flags & AMD_IS_APU))
3826 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3827 
3828 	if (!adev->pp_enabled) {
3829 		if (!adev->firmware.smu_load) {
3830 			/* legacy firmware loading */
3831 			r = gfx_v8_0_cp_gfx_load_microcode(adev);
3832 			if (r)
3833 				return r;
3834 
3835 			r = gfx_v8_0_cp_compute_load_microcode(adev);
3836 			if (r)
3837 				return r;
3838 		} else {
3839 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3840 							AMDGPU_UCODE_ID_CP_CE);
3841 			if (r)
3842 				return -EINVAL;
3843 
3844 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3845 							AMDGPU_UCODE_ID_CP_PFP);
3846 			if (r)
3847 				return -EINVAL;
3848 
3849 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3850 							AMDGPU_UCODE_ID_CP_ME);
3851 			if (r)
3852 				return -EINVAL;
3853 
3854 			if (adev->asic_type == CHIP_TOPAZ) {
3855 				r = gfx_v8_0_cp_compute_load_microcode(adev);
3856 				if (r)
3857 					return r;
3858 			} else {
3859 				r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3860 										 AMDGPU_UCODE_ID_CP_MEC1);
3861 				if (r)
3862 					return -EINVAL;
3863 			}
3864 		}
3865 	}
3866 
3867 	r = gfx_v8_0_cp_gfx_resume(adev);
3868 	if (r)
3869 		return r;
3870 
3871 	r = gfx_v8_0_cp_compute_resume(adev);
3872 	if (r)
3873 		return r;
3874 
3875 	gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3876 
3877 	return 0;
3878 }
3879 
3880 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
3881 {
3882 	gfx_v8_0_cp_gfx_enable(adev, enable);
3883 	gfx_v8_0_cp_compute_enable(adev, enable);
3884 }
3885 
3886 static int gfx_v8_0_hw_init(void *handle)
3887 {
3888 	int r;
3889 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3890 
3891 	gfx_v8_0_init_golden_registers(adev);
3892 
3893 	gfx_v8_0_gpu_init(adev);
3894 
3895 	r = gfx_v8_0_rlc_resume(adev);
3896 	if (r)
3897 		return r;
3898 
3899 	r = gfx_v8_0_cp_resume(adev);
3900 	if (r)
3901 		return r;
3902 
3903 	return r;
3904 }
3905 
3906 static int gfx_v8_0_hw_fini(void *handle)
3907 {
3908 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3909 
3910 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3911 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3912 	gfx_v8_0_cp_enable(adev, false);
3913 	gfx_v8_0_rlc_stop(adev);
3914 	gfx_v8_0_cp_compute_fini(adev);
3915 
3916 	return 0;
3917 }
3918 
3919 static int gfx_v8_0_suspend(void *handle)
3920 {
3921 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3922 
3923 	return gfx_v8_0_hw_fini(adev);
3924 }
3925 
3926 static int gfx_v8_0_resume(void *handle)
3927 {
3928 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3929 
3930 	return gfx_v8_0_hw_init(adev);
3931 }
3932 
3933 static bool gfx_v8_0_is_idle(void *handle)
3934 {
3935 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3936 
3937 	if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
3938 		return false;
3939 	else
3940 		return true;
3941 }
3942 
3943 static int gfx_v8_0_wait_for_idle(void *handle)
3944 {
3945 	unsigned i;
3946 	u32 tmp;
3947 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3948 
3949 	for (i = 0; i < adev->usec_timeout; i++) {
3950 		/* read MC_STATUS */
3951 		tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
3952 
3953 		if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
3954 			return 0;
3955 		udelay(1);
3956 	}
3957 	return -ETIMEDOUT;
3958 }
3959 
3960 static void gfx_v8_0_print_status(void *handle)
3961 {
3962 	int i;
3963 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3964 
3965 	dev_info(adev->dev, "GFX 8.x registers\n");
3966 	dev_info(adev->dev, "  GRBM_STATUS=0x%08X\n",
3967 		 RREG32(mmGRBM_STATUS));
3968 	dev_info(adev->dev, "  GRBM_STATUS2=0x%08X\n",
3969 		 RREG32(mmGRBM_STATUS2));
3970 	dev_info(adev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
3971 		 RREG32(mmGRBM_STATUS_SE0));
3972 	dev_info(adev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
3973 		 RREG32(mmGRBM_STATUS_SE1));
3974 	dev_info(adev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
3975 		 RREG32(mmGRBM_STATUS_SE2));
3976 	dev_info(adev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
3977 		 RREG32(mmGRBM_STATUS_SE3));
3978 	dev_info(adev->dev, "  CP_STAT = 0x%08x\n", RREG32(mmCP_STAT));
3979 	dev_info(adev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
3980 		 RREG32(mmCP_STALLED_STAT1));
3981 	dev_info(adev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
3982 		 RREG32(mmCP_STALLED_STAT2));
3983 	dev_info(adev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
3984 		 RREG32(mmCP_STALLED_STAT3));
3985 	dev_info(adev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
3986 		 RREG32(mmCP_CPF_BUSY_STAT));
3987 	dev_info(adev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
3988 		 RREG32(mmCP_CPF_STALLED_STAT1));
3989 	dev_info(adev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS));
3990 	dev_info(adev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT));
3991 	dev_info(adev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
3992 		 RREG32(mmCP_CPC_STALLED_STAT1));
3993 	dev_info(adev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS));
3994 
3995 	for (i = 0; i < 32; i++) {
3996 		dev_info(adev->dev, "  GB_TILE_MODE%d=0x%08X\n",
3997 			 i, RREG32(mmGB_TILE_MODE0 + (i * 4)));
3998 	}
3999 	for (i = 0; i < 16; i++) {
4000 		dev_info(adev->dev, "  GB_MACROTILE_MODE%d=0x%08X\n",
4001 			 i, RREG32(mmGB_MACROTILE_MODE0 + (i * 4)));
4002 	}
4003 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
4004 		dev_info(adev->dev, "  se: %d\n", i);
4005 		gfx_v8_0_select_se_sh(adev, i, 0xffffffff);
4006 		dev_info(adev->dev, "  PA_SC_RASTER_CONFIG=0x%08X\n",
4007 			 RREG32(mmPA_SC_RASTER_CONFIG));
4008 		dev_info(adev->dev, "  PA_SC_RASTER_CONFIG_1=0x%08X\n",
4009 			 RREG32(mmPA_SC_RASTER_CONFIG_1));
4010 	}
4011 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
4012 
4013 	dev_info(adev->dev, "  GB_ADDR_CONFIG=0x%08X\n",
4014 		 RREG32(mmGB_ADDR_CONFIG));
4015 	dev_info(adev->dev, "  HDP_ADDR_CONFIG=0x%08X\n",
4016 		 RREG32(mmHDP_ADDR_CONFIG));
4017 	dev_info(adev->dev, "  DMIF_ADDR_CALC=0x%08X\n",
4018 		 RREG32(mmDMIF_ADDR_CALC));
4019 	dev_info(adev->dev, "  SDMA0_TILING_CONFIG=0x%08X\n",
4020 		 RREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET));
4021 	dev_info(adev->dev, "  SDMA1_TILING_CONFIG=0x%08X\n",
4022 		 RREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET));
4023 	dev_info(adev->dev, "  UVD_UDEC_ADDR_CONFIG=0x%08X\n",
4024 		 RREG32(mmUVD_UDEC_ADDR_CONFIG));
4025 	dev_info(adev->dev, "  UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n",
4026 		 RREG32(mmUVD_UDEC_DB_ADDR_CONFIG));
4027 	dev_info(adev->dev, "  UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n",
4028 		 RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG));
4029 
4030 	dev_info(adev->dev, "  CP_MEQ_THRESHOLDS=0x%08X\n",
4031 		 RREG32(mmCP_MEQ_THRESHOLDS));
4032 	dev_info(adev->dev, "  SX_DEBUG_1=0x%08X\n",
4033 		 RREG32(mmSX_DEBUG_1));
4034 	dev_info(adev->dev, "  TA_CNTL_AUX=0x%08X\n",
4035 		 RREG32(mmTA_CNTL_AUX));
4036 	dev_info(adev->dev, "  SPI_CONFIG_CNTL=0x%08X\n",
4037 		 RREG32(mmSPI_CONFIG_CNTL));
4038 	dev_info(adev->dev, "  SQ_CONFIG=0x%08X\n",
4039 		 RREG32(mmSQ_CONFIG));
4040 	dev_info(adev->dev, "  DB_DEBUG=0x%08X\n",
4041 		 RREG32(mmDB_DEBUG));
4042 	dev_info(adev->dev, "  DB_DEBUG2=0x%08X\n",
4043 		 RREG32(mmDB_DEBUG2));
4044 	dev_info(adev->dev, "  DB_DEBUG3=0x%08X\n",
4045 		 RREG32(mmDB_DEBUG3));
4046 	dev_info(adev->dev, "  CB_HW_CONTROL=0x%08X\n",
4047 		 RREG32(mmCB_HW_CONTROL));
4048 	dev_info(adev->dev, "  SPI_CONFIG_CNTL_1=0x%08X\n",
4049 		 RREG32(mmSPI_CONFIG_CNTL_1));
4050 	dev_info(adev->dev, "  PA_SC_FIFO_SIZE=0x%08X\n",
4051 		 RREG32(mmPA_SC_FIFO_SIZE));
4052 	dev_info(adev->dev, "  VGT_NUM_INSTANCES=0x%08X\n",
4053 		 RREG32(mmVGT_NUM_INSTANCES));
4054 	dev_info(adev->dev, "  CP_PERFMON_CNTL=0x%08X\n",
4055 		 RREG32(mmCP_PERFMON_CNTL));
4056 	dev_info(adev->dev, "  PA_SC_FORCE_EOV_MAX_CNTS=0x%08X\n",
4057 		 RREG32(mmPA_SC_FORCE_EOV_MAX_CNTS));
4058 	dev_info(adev->dev, "  VGT_CACHE_INVALIDATION=0x%08X\n",
4059 		 RREG32(mmVGT_CACHE_INVALIDATION));
4060 	dev_info(adev->dev, "  VGT_GS_VERTEX_REUSE=0x%08X\n",
4061 		 RREG32(mmVGT_GS_VERTEX_REUSE));
4062 	dev_info(adev->dev, "  PA_SC_LINE_STIPPLE_STATE=0x%08X\n",
4063 		 RREG32(mmPA_SC_LINE_STIPPLE_STATE));
4064 	dev_info(adev->dev, "  PA_CL_ENHANCE=0x%08X\n",
4065 		 RREG32(mmPA_CL_ENHANCE));
4066 	dev_info(adev->dev, "  PA_SC_ENHANCE=0x%08X\n",
4067 		 RREG32(mmPA_SC_ENHANCE));
4068 
4069 	dev_info(adev->dev, "  CP_ME_CNTL=0x%08X\n",
4070 		 RREG32(mmCP_ME_CNTL));
4071 	dev_info(adev->dev, "  CP_MAX_CONTEXT=0x%08X\n",
4072 		 RREG32(mmCP_MAX_CONTEXT));
4073 	dev_info(adev->dev, "  CP_ENDIAN_SWAP=0x%08X\n",
4074 		 RREG32(mmCP_ENDIAN_SWAP));
4075 	dev_info(adev->dev, "  CP_DEVICE_ID=0x%08X\n",
4076 		 RREG32(mmCP_DEVICE_ID));
4077 
4078 	dev_info(adev->dev, "  CP_SEM_WAIT_TIMER=0x%08X\n",
4079 		 RREG32(mmCP_SEM_WAIT_TIMER));
4080 
4081 	dev_info(adev->dev, "  CP_RB_WPTR_DELAY=0x%08X\n",
4082 		 RREG32(mmCP_RB_WPTR_DELAY));
4083 	dev_info(adev->dev, "  CP_RB_VMID=0x%08X\n",
4084 		 RREG32(mmCP_RB_VMID));
4085 	dev_info(adev->dev, "  CP_RB0_CNTL=0x%08X\n",
4086 		 RREG32(mmCP_RB0_CNTL));
4087 	dev_info(adev->dev, "  CP_RB0_WPTR=0x%08X\n",
4088 		 RREG32(mmCP_RB0_WPTR));
4089 	dev_info(adev->dev, "  CP_RB0_RPTR_ADDR=0x%08X\n",
4090 		 RREG32(mmCP_RB0_RPTR_ADDR));
4091 	dev_info(adev->dev, "  CP_RB0_RPTR_ADDR_HI=0x%08X\n",
4092 		 RREG32(mmCP_RB0_RPTR_ADDR_HI));
4093 	dev_info(adev->dev, "  CP_RB0_CNTL=0x%08X\n",
4094 		 RREG32(mmCP_RB0_CNTL));
4095 	dev_info(adev->dev, "  CP_RB0_BASE=0x%08X\n",
4096 		 RREG32(mmCP_RB0_BASE));
4097 	dev_info(adev->dev, "  CP_RB0_BASE_HI=0x%08X\n",
4098 		 RREG32(mmCP_RB0_BASE_HI));
4099 	dev_info(adev->dev, "  CP_MEC_CNTL=0x%08X\n",
4100 		 RREG32(mmCP_MEC_CNTL));
4101 	dev_info(adev->dev, "  CP_CPF_DEBUG=0x%08X\n",
4102 		 RREG32(mmCP_CPF_DEBUG));
4103 
4104 	dev_info(adev->dev, "  SCRATCH_ADDR=0x%08X\n",
4105 		 RREG32(mmSCRATCH_ADDR));
4106 	dev_info(adev->dev, "  SCRATCH_UMSK=0x%08X\n",
4107 		 RREG32(mmSCRATCH_UMSK));
4108 
4109 	dev_info(adev->dev, "  CP_INT_CNTL_RING0=0x%08X\n",
4110 		 RREG32(mmCP_INT_CNTL_RING0));
4111 	dev_info(adev->dev, "  RLC_LB_CNTL=0x%08X\n",
4112 		 RREG32(mmRLC_LB_CNTL));
4113 	dev_info(adev->dev, "  RLC_CNTL=0x%08X\n",
4114 		 RREG32(mmRLC_CNTL));
4115 	dev_info(adev->dev, "  RLC_CGCG_CGLS_CTRL=0x%08X\n",
4116 		 RREG32(mmRLC_CGCG_CGLS_CTRL));
4117 	dev_info(adev->dev, "  RLC_LB_CNTR_INIT=0x%08X\n",
4118 		 RREG32(mmRLC_LB_CNTR_INIT));
4119 	dev_info(adev->dev, "  RLC_LB_CNTR_MAX=0x%08X\n",
4120 		 RREG32(mmRLC_LB_CNTR_MAX));
4121 	dev_info(adev->dev, "  RLC_LB_INIT_CU_MASK=0x%08X\n",
4122 		 RREG32(mmRLC_LB_INIT_CU_MASK));
4123 	dev_info(adev->dev, "  RLC_LB_PARAMS=0x%08X\n",
4124 		 RREG32(mmRLC_LB_PARAMS));
4125 	dev_info(adev->dev, "  RLC_LB_CNTL=0x%08X\n",
4126 		 RREG32(mmRLC_LB_CNTL));
4127 	dev_info(adev->dev, "  RLC_MC_CNTL=0x%08X\n",
4128 		 RREG32(mmRLC_MC_CNTL));
4129 	dev_info(adev->dev, "  RLC_UCODE_CNTL=0x%08X\n",
4130 		 RREG32(mmRLC_UCODE_CNTL));
4131 
4132 	mutex_lock(&adev->srbm_mutex);
4133 	for (i = 0; i < 16; i++) {
4134 		vi_srbm_select(adev, 0, 0, 0, i);
4135 		dev_info(adev->dev, "  VM %d:\n", i);
4136 		dev_info(adev->dev, "  SH_MEM_CONFIG=0x%08X\n",
4137 			 RREG32(mmSH_MEM_CONFIG));
4138 		dev_info(adev->dev, "  SH_MEM_APE1_BASE=0x%08X\n",
4139 			 RREG32(mmSH_MEM_APE1_BASE));
4140 		dev_info(adev->dev, "  SH_MEM_APE1_LIMIT=0x%08X\n",
4141 			 RREG32(mmSH_MEM_APE1_LIMIT));
4142 		dev_info(adev->dev, "  SH_MEM_BASES=0x%08X\n",
4143 			 RREG32(mmSH_MEM_BASES));
4144 	}
4145 	vi_srbm_select(adev, 0, 0, 0, 0);
4146 	mutex_unlock(&adev->srbm_mutex);
4147 }
4148 
4149 static int gfx_v8_0_soft_reset(void *handle)
4150 {
4151 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4152 	u32 tmp;
4153 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4154 
4155 	/* GRBM_STATUS */
4156 	tmp = RREG32(mmGRBM_STATUS);
4157 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4158 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4159 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4160 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4161 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4162 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4163 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4164 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4165 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4166 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4167 	}
4168 
4169 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4170 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4171 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4172 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4173 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4174 	}
4175 
4176 	/* GRBM_STATUS2 */
4177 	tmp = RREG32(mmGRBM_STATUS2);
4178 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4179 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4180 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4181 
4182 	/* SRBM_STATUS */
4183 	tmp = RREG32(mmSRBM_STATUS);
4184 	if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4185 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4186 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4187 
4188 	if (grbm_soft_reset || srbm_soft_reset) {
4189 		gfx_v8_0_print_status((void *)adev);
4190 		/* stop the rlc */
4191 		gfx_v8_0_rlc_stop(adev);
4192 
4193 		/* Disable GFX parsing/prefetching */
4194 		gfx_v8_0_cp_gfx_enable(adev, false);
4195 
4196 		/* Disable MEC parsing/prefetching */
4197 		gfx_v8_0_cp_compute_enable(adev, false);
4198 
4199 		if (grbm_soft_reset || srbm_soft_reset) {
4200 			tmp = RREG32(mmGMCON_DEBUG);
4201 			tmp = REG_SET_FIELD(tmp,
4202 					    GMCON_DEBUG, GFX_STALL, 1);
4203 			tmp = REG_SET_FIELD(tmp,
4204 					    GMCON_DEBUG, GFX_CLEAR, 1);
4205 			WREG32(mmGMCON_DEBUG, tmp);
4206 
4207 			udelay(50);
4208 		}
4209 
4210 		if (grbm_soft_reset) {
4211 			tmp = RREG32(mmGRBM_SOFT_RESET);
4212 			tmp |= grbm_soft_reset;
4213 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4214 			WREG32(mmGRBM_SOFT_RESET, tmp);
4215 			tmp = RREG32(mmGRBM_SOFT_RESET);
4216 
4217 			udelay(50);
4218 
4219 			tmp &= ~grbm_soft_reset;
4220 			WREG32(mmGRBM_SOFT_RESET, tmp);
4221 			tmp = RREG32(mmGRBM_SOFT_RESET);
4222 		}
4223 
4224 		if (srbm_soft_reset) {
4225 			tmp = RREG32(mmSRBM_SOFT_RESET);
4226 			tmp |= srbm_soft_reset;
4227 			dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4228 			WREG32(mmSRBM_SOFT_RESET, tmp);
4229 			tmp = RREG32(mmSRBM_SOFT_RESET);
4230 
4231 			udelay(50);
4232 
4233 			tmp &= ~srbm_soft_reset;
4234 			WREG32(mmSRBM_SOFT_RESET, tmp);
4235 			tmp = RREG32(mmSRBM_SOFT_RESET);
4236 		}
4237 
4238 		if (grbm_soft_reset || srbm_soft_reset) {
4239 			tmp = RREG32(mmGMCON_DEBUG);
4240 			tmp = REG_SET_FIELD(tmp,
4241 					    GMCON_DEBUG, GFX_STALL, 0);
4242 			tmp = REG_SET_FIELD(tmp,
4243 					    GMCON_DEBUG, GFX_CLEAR, 0);
4244 			WREG32(mmGMCON_DEBUG, tmp);
4245 		}
4246 
4247 		/* Wait a little for things to settle down */
4248 		udelay(50);
4249 		gfx_v8_0_print_status((void *)adev);
4250 	}
4251 	return 0;
4252 }
4253 
4254 /**
4255  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
4256  *
4257  * @adev: amdgpu_device pointer
4258  *
4259  * Fetches a GPU clock counter snapshot.
4260  * Returns the 64 bit clock counter snapshot.
4261  */
4262 uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4263 {
4264 	uint64_t clock;
4265 
4266 	mutex_lock(&adev->gfx.gpu_clock_mutex);
4267 	WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4268 	clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
4269 		((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4270 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
4271 	return clock;
4272 }
4273 
4274 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4275 					  uint32_t vmid,
4276 					  uint32_t gds_base, uint32_t gds_size,
4277 					  uint32_t gws_base, uint32_t gws_size,
4278 					  uint32_t oa_base, uint32_t oa_size)
4279 {
4280 	gds_base = gds_base >> AMDGPU_GDS_SHIFT;
4281 	gds_size = gds_size >> AMDGPU_GDS_SHIFT;
4282 
4283 	gws_base = gws_base >> AMDGPU_GWS_SHIFT;
4284 	gws_size = gws_size >> AMDGPU_GWS_SHIFT;
4285 
4286 	oa_base = oa_base >> AMDGPU_OA_SHIFT;
4287 	oa_size = oa_size >> AMDGPU_OA_SHIFT;
4288 
4289 	/* GDS Base */
4290 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4291 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4292 				WRITE_DATA_DST_SEL(0)));
4293 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
4294 	amdgpu_ring_write(ring, 0);
4295 	amdgpu_ring_write(ring, gds_base);
4296 
4297 	/* GDS Size */
4298 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4299 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4300 				WRITE_DATA_DST_SEL(0)));
4301 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
4302 	amdgpu_ring_write(ring, 0);
4303 	amdgpu_ring_write(ring, gds_size);
4304 
4305 	/* GWS */
4306 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4307 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4308 				WRITE_DATA_DST_SEL(0)));
4309 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
4310 	amdgpu_ring_write(ring, 0);
4311 	amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4312 
4313 	/* OA */
4314 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4315 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4316 				WRITE_DATA_DST_SEL(0)));
4317 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
4318 	amdgpu_ring_write(ring, 0);
4319 	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
4320 }
4321 
4322 static int gfx_v8_0_early_init(void *handle)
4323 {
4324 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4325 
4326 	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
4327 	adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
4328 	gfx_v8_0_set_ring_funcs(adev);
4329 	gfx_v8_0_set_irq_funcs(adev);
4330 	gfx_v8_0_set_gds_init(adev);
4331 
4332 	return 0;
4333 }
4334 
4335 static int gfx_v8_0_late_init(void *handle)
4336 {
4337 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4338 	int r;
4339 
4340 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4341 	if (r)
4342 		return r;
4343 
4344 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4345 	if (r)
4346 		return r;
4347 
4348 	/* requires IBs so do in late init after IB pool is initialized */
4349 	r = gfx_v8_0_do_edc_gpr_workarounds(adev);
4350 	if (r)
4351 		return r;
4352 
4353 	return 0;
4354 }
4355 
4356 static int gfx_v8_0_set_powergating_state(void *handle,
4357 					  enum amd_powergating_state state)
4358 {
4359 	return 0;
4360 }
4361 
4362 static void fiji_send_serdes_cmd(struct amdgpu_device *adev,
4363 		uint32_t reg_addr, uint32_t cmd)
4364 {
4365 	uint32_t data;
4366 
4367 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
4368 
4369 	WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
4370 	WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
4371 
4372 	data = RREG32(mmRLC_SERDES_WR_CTRL);
4373 	data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
4374 			RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
4375 			RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
4376 			RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
4377 			RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
4378 			RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
4379 			RLC_SERDES_WR_CTRL__POWER_UP_MASK |
4380 			RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
4381 			RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
4382 			RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
4383 			RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
4384 	data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
4385 			(cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
4386 			(reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
4387 			(0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
4388 
4389 	WREG32(mmRLC_SERDES_WR_CTRL, data);
4390 }
4391 
4392 static void fiji_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4393 		bool enable)
4394 {
4395 	uint32_t temp, data;
4396 
4397 	/* It is disabled by HW by default */
4398 	if (enable) {
4399 		/* 1 - RLC memory Light sleep */
4400 		temp = data = RREG32(mmRLC_MEM_SLP_CNTL);
4401 		data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4402 		if (temp != data)
4403 			WREG32(mmRLC_MEM_SLP_CNTL, data);
4404 
4405 		/* 2 - CP memory Light sleep */
4406 		temp = data = RREG32(mmCP_MEM_SLP_CNTL);
4407 		data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4408 		if (temp != data)
4409 			WREG32(mmCP_MEM_SLP_CNTL, data);
4410 
4411 		/* 3 - RLC_CGTT_MGCG_OVERRIDE */
4412 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4413 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
4414 				RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
4415 				RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
4416 				RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
4417 
4418 		if (temp != data)
4419 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
4420 
4421 		/* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4422 		gfx_v8_0_wait_for_rlc_serdes(adev);
4423 
4424 		/* 5 - clear mgcg override */
4425 		fiji_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
4426 
4427 		/* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
4428 		temp = data = RREG32(mmCGTS_SM_CTRL_REG);
4429 		data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
4430 		data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
4431 		data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
4432 		data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
4433 		data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
4434 		data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
4435 		data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
4436 		if (temp != data)
4437 			WREG32(mmCGTS_SM_CTRL_REG, data);
4438 		udelay(50);
4439 
4440 		/* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4441 		gfx_v8_0_wait_for_rlc_serdes(adev);
4442 	} else {
4443 		/* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
4444 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4445 		data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
4446 				RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
4447 				RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
4448 				RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
4449 		if (temp != data)
4450 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
4451 
4452 		/* 2 - disable MGLS in RLC */
4453 		data = RREG32(mmRLC_MEM_SLP_CNTL);
4454 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4455 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4456 			WREG32(mmRLC_MEM_SLP_CNTL, data);
4457 		}
4458 
4459 		/* 3 - disable MGLS in CP */
4460 		data = RREG32(mmCP_MEM_SLP_CNTL);
4461 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4462 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4463 			WREG32(mmCP_MEM_SLP_CNTL, data);
4464 		}
4465 
4466 		/* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
4467 		temp = data = RREG32(mmCGTS_SM_CTRL_REG);
4468 		data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
4469 				CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
4470 		if (temp != data)
4471 			WREG32(mmCGTS_SM_CTRL_REG, data);
4472 
4473 		/* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4474 		gfx_v8_0_wait_for_rlc_serdes(adev);
4475 
4476 		/* 6 - set mgcg override */
4477 		fiji_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
4478 
4479 		udelay(50);
4480 
4481 		/* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4482 		gfx_v8_0_wait_for_rlc_serdes(adev);
4483 	}
4484 }
4485 
4486 static void fiji_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4487 		bool enable)
4488 {
4489 	uint32_t temp, temp1, data, data1;
4490 
4491 	temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
4492 
4493 	if (enable) {
4494 		/* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
4495 		 * Cmp_busy/GFX_Idle interrupts
4496 		 */
4497 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4498 
4499 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4500 		data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
4501 		if (temp1 != data1)
4502 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4503 
4504 		/* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4505 		gfx_v8_0_wait_for_rlc_serdes(adev);
4506 
4507 		/* 3 - clear cgcg override */
4508 		fiji_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
4509 
4510 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4511 		gfx_v8_0_wait_for_rlc_serdes(adev);
4512 
4513 		/* 4 - write cmd to set CGLS */
4514 		fiji_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
4515 
4516 		/* 5 - enable cgcg */
4517 		data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4518 
4519 		/* enable cgls*/
4520 		data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4521 
4522 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4523 		data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
4524 
4525 		if (temp1 != data1)
4526 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4527 
4528 		if (temp != data)
4529 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
4530 	} else {
4531 		/* disable cntx_empty_int_enable & GFX Idle interrupt */
4532 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4533 
4534 		/* TEST CGCG */
4535 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4536 		data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
4537 				RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
4538 		if (temp1 != data1)
4539 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4540 
4541 		/* read gfx register to wake up cgcg */
4542 		RREG32(mmCB_CGTT_SCLK_CTRL);
4543 		RREG32(mmCB_CGTT_SCLK_CTRL);
4544 		RREG32(mmCB_CGTT_SCLK_CTRL);
4545 		RREG32(mmCB_CGTT_SCLK_CTRL);
4546 
4547 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4548 		gfx_v8_0_wait_for_rlc_serdes(adev);
4549 
4550 		/* write cmd to Set CGCG Overrride */
4551 		fiji_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
4552 
4553 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4554 		gfx_v8_0_wait_for_rlc_serdes(adev);
4555 
4556 		/* write cmd to Clear CGLS */
4557 		fiji_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
4558 
4559 		/* disable cgcg, cgls should be disabled too. */
4560 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4561 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4562 		if (temp != data)
4563 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
4564 	}
4565 }
4566 static int fiji_update_gfx_clock_gating(struct amdgpu_device *adev,
4567 		bool enable)
4568 {
4569 	if (enable) {
4570 		/* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
4571 		 * ===  MGCG + MGLS + TS(CG/LS) ===
4572 		 */
4573 		fiji_update_medium_grain_clock_gating(adev, enable);
4574 		fiji_update_coarse_grain_clock_gating(adev, enable);
4575 	} else {
4576 		/* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
4577 		 * ===  CGCG + CGLS ===
4578 		 */
4579 		fiji_update_coarse_grain_clock_gating(adev, enable);
4580 		fiji_update_medium_grain_clock_gating(adev, enable);
4581 	}
4582 	return 0;
4583 }
4584 
4585 static int gfx_v8_0_set_clockgating_state(void *handle,
4586 					  enum amd_clockgating_state state)
4587 {
4588 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4589 
4590 	switch (adev->asic_type) {
4591 	case CHIP_FIJI:
4592 		fiji_update_gfx_clock_gating(adev,
4593 				state == AMD_CG_STATE_GATE ? true : false);
4594 		break;
4595 	default:
4596 		break;
4597 	}
4598 	return 0;
4599 }
4600 
4601 static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4602 {
4603 	u32 rptr;
4604 
4605 	rptr = ring->adev->wb.wb[ring->rptr_offs];
4606 
4607 	return rptr;
4608 }
4609 
4610 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4611 {
4612 	struct amdgpu_device *adev = ring->adev;
4613 	u32 wptr;
4614 
4615 	if (ring->use_doorbell)
4616 		/* XXX check if swapping is necessary on BE */
4617 		wptr = ring->adev->wb.wb[ring->wptr_offs];
4618 	else
4619 		wptr = RREG32(mmCP_RB0_WPTR);
4620 
4621 	return wptr;
4622 }
4623 
4624 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4625 {
4626 	struct amdgpu_device *adev = ring->adev;
4627 
4628 	if (ring->use_doorbell) {
4629 		/* XXX check if swapping is necessary on BE */
4630 		adev->wb.wb[ring->wptr_offs] = ring->wptr;
4631 		WDOORBELL32(ring->doorbell_index, ring->wptr);
4632 	} else {
4633 		WREG32(mmCP_RB0_WPTR, ring->wptr);
4634 		(void)RREG32(mmCP_RB0_WPTR);
4635 	}
4636 }
4637 
4638 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4639 {
4640 	u32 ref_and_mask, reg_mem_engine;
4641 
4642 	if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
4643 		switch (ring->me) {
4644 		case 1:
4645 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
4646 			break;
4647 		case 2:
4648 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
4649 			break;
4650 		default:
4651 			return;
4652 		}
4653 		reg_mem_engine = 0;
4654 	} else {
4655 		ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
4656 		reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
4657 	}
4658 
4659 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4660 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
4661 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
4662 				 reg_mem_engine));
4663 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
4664 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
4665 	amdgpu_ring_write(ring, ref_and_mask);
4666 	amdgpu_ring_write(ring, ref_and_mask);
4667 	amdgpu_ring_write(ring, 0x20); /* poll interval */
4668 }
4669 
4670 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4671 				  struct amdgpu_ib *ib)
4672 {
4673 	bool need_ctx_switch = ring->current_ctx != ib->ctx;
4674 	u32 header, control = 0;
4675 	u32 next_rptr = ring->wptr + 5;
4676 
4677 	/* drop the CE preamble IB for the same context */
4678 	if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && !need_ctx_switch)
4679 		return;
4680 
4681 	if (need_ctx_switch)
4682 		next_rptr += 2;
4683 
4684 	next_rptr += 4;
4685 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4686 	amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
4687 	amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4688 	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4689 	amdgpu_ring_write(ring, next_rptr);
4690 
4691 	/* insert SWITCH_BUFFER packet before first IB in the ring frame */
4692 	if (need_ctx_switch) {
4693 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4694 		amdgpu_ring_write(ring, 0);
4695 	}
4696 
4697 	if (ib->flags & AMDGPU_IB_FLAG_CE)
4698 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4699 	else
4700 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4701 
4702 	control |= ib->length_dw |
4703 		(ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
4704 
4705 	amdgpu_ring_write(ring, header);
4706 	amdgpu_ring_write(ring,
4707 #ifdef __BIG_ENDIAN
4708 			  (2 << 0) |
4709 #endif
4710 			  (ib->gpu_addr & 0xFFFFFFFC));
4711 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4712 	amdgpu_ring_write(ring, control);
4713 }
4714 
4715 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4716 				  struct amdgpu_ib *ib)
4717 {
4718 	u32 header, control = 0;
4719 	u32 next_rptr = ring->wptr + 5;
4720 
4721 	control |= INDIRECT_BUFFER_VALID;
4722 
4723 	next_rptr += 4;
4724 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4725 	amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
4726 	amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4727 	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4728 	amdgpu_ring_write(ring, next_rptr);
4729 
4730 	header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4731 
4732 	control |= ib->length_dw |
4733 			   (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
4734 
4735 	amdgpu_ring_write(ring, header);
4736 	amdgpu_ring_write(ring,
4737 #ifdef __BIG_ENDIAN
4738 					  (2 << 0) |
4739 #endif
4740 					  (ib->gpu_addr & 0xFFFFFFFC));
4741 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4742 	amdgpu_ring_write(ring, control);
4743 }
4744 
4745 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
4746 					 u64 seq, unsigned flags)
4747 {
4748 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4749 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4750 
4751 	/* EVENT_WRITE_EOP - flush caches, send int */
4752 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
4753 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
4754 				 EOP_TC_ACTION_EN |
4755 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4756 				 EVENT_INDEX(5)));
4757 	amdgpu_ring_write(ring, addr & 0xfffffffc);
4758 	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
4759 			  DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4760 	amdgpu_ring_write(ring, lower_32_bits(seq));
4761 	amdgpu_ring_write(ring, upper_32_bits(seq));
4762 
4763 }
4764 
4765 /**
4766  * gfx_v8_0_ring_emit_semaphore - emit a semaphore on the CP ring
4767  *
4768  * @ring: amdgpu ring buffer object
4769  * @semaphore: amdgpu semaphore object
4770  * @emit_wait: Is this a sempahore wait?
4771  *
4772  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
4773  * from running ahead of semaphore waits.
4774  */
4775 static bool gfx_v8_0_ring_emit_semaphore(struct amdgpu_ring *ring,
4776 					 struct amdgpu_semaphore *semaphore,
4777 					 bool emit_wait)
4778 {
4779 	uint64_t addr = semaphore->gpu_addr;
4780 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
4781 
4782 	if (ring->adev->asic_type == CHIP_TOPAZ ||
4783 	    ring->adev->asic_type == CHIP_TONGA ||
4784 	    ring->adev->asic_type == CHIP_FIJI)
4785 		/* we got a hw semaphore bug in VI TONGA, return false to switch back to sw fence wait */
4786 		return false;
4787 	else {
4788 		amdgpu_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 2));
4789 		amdgpu_ring_write(ring, lower_32_bits(addr));
4790 		amdgpu_ring_write(ring, upper_32_bits(addr));
4791 		amdgpu_ring_write(ring, sel);
4792 	}
4793 
4794 	if (emit_wait && (ring->type == AMDGPU_RING_TYPE_GFX)) {
4795 		/* Prevent the PFP from running ahead of the semaphore wait */
4796 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4797 		amdgpu_ring_write(ring, 0x0);
4798 	}
4799 
4800 	return true;
4801 }
4802 
4803 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4804 					unsigned vm_id, uint64_t pd_addr)
4805 {
4806 	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
4807 	uint32_t seq = ring->fence_drv.sync_seq[ring->idx];
4808 	uint64_t addr = ring->fence_drv.gpu_addr;
4809 
4810 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4811 	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
4812 		 WAIT_REG_MEM_FUNCTION(3))); /* equal */
4813 	amdgpu_ring_write(ring, addr & 0xfffffffc);
4814 	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
4815 	amdgpu_ring_write(ring, seq);
4816 	amdgpu_ring_write(ring, 0xffffffff);
4817 	amdgpu_ring_write(ring, 4); /* poll interval */
4818 
4819 	if (usepfp) {
4820 		/* synce CE with ME to prevent CE fetch CEIB before context switch done */
4821 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4822 		amdgpu_ring_write(ring, 0);
4823 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4824 		amdgpu_ring_write(ring, 0);
4825 	}
4826 
4827 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4828 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
4829 				 WRITE_DATA_DST_SEL(0)) |
4830 				 WR_CONFIRM);
4831 	if (vm_id < 8) {
4832 		amdgpu_ring_write(ring,
4833 				  (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
4834 	} else {
4835 		amdgpu_ring_write(ring,
4836 				  (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
4837 	}
4838 	amdgpu_ring_write(ring, 0);
4839 	amdgpu_ring_write(ring, pd_addr >> 12);
4840 
4841 	/* bits 0-15 are the VM contexts0-15 */
4842 	/* invalidate the cache */
4843 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4844 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4845 				 WRITE_DATA_DST_SEL(0)));
4846 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
4847 	amdgpu_ring_write(ring, 0);
4848 	amdgpu_ring_write(ring, 1 << vm_id);
4849 
4850 	/* wait for the invalidate to complete */
4851 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4852 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
4853 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
4854 				 WAIT_REG_MEM_ENGINE(0))); /* me */
4855 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
4856 	amdgpu_ring_write(ring, 0);
4857 	amdgpu_ring_write(ring, 0); /* ref */
4858 	amdgpu_ring_write(ring, 0); /* mask */
4859 	amdgpu_ring_write(ring, 0x20); /* poll interval */
4860 
4861 	/* compute doesn't have PFP */
4862 	if (usepfp) {
4863 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
4864 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4865 		amdgpu_ring_write(ring, 0x0);
4866 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4867 		amdgpu_ring_write(ring, 0);
4868 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4869 		amdgpu_ring_write(ring, 0);
4870 	}
4871 }
4872 
4873 static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4874 {
4875 	return ring->adev->wb.wb[ring->rptr_offs];
4876 }
4877 
4878 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4879 {
4880 	return ring->adev->wb.wb[ring->wptr_offs];
4881 }
4882 
4883 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4884 {
4885 	struct amdgpu_device *adev = ring->adev;
4886 
4887 	/* XXX check if swapping is necessary on BE */
4888 	adev->wb.wb[ring->wptr_offs] = ring->wptr;
4889 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4890 }
4891 
4892 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
4893 					     u64 addr, u64 seq,
4894 					     unsigned flags)
4895 {
4896 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4897 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4898 
4899 	/* RELEASE_MEM - flush caches, send int */
4900 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
4901 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
4902 				 EOP_TC_ACTION_EN |
4903 				 EOP_TC_WB_ACTION_EN |
4904 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4905 				 EVENT_INDEX(5)));
4906 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4907 	amdgpu_ring_write(ring, addr & 0xfffffffc);
4908 	amdgpu_ring_write(ring, upper_32_bits(addr));
4909 	amdgpu_ring_write(ring, lower_32_bits(seq));
4910 	amdgpu_ring_write(ring, upper_32_bits(seq));
4911 }
4912 
4913 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4914 						 enum amdgpu_interrupt_state state)
4915 {
4916 	u32 cp_int_cntl;
4917 
4918 	switch (state) {
4919 	case AMDGPU_IRQ_STATE_DISABLE:
4920 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4921 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4922 					    TIME_STAMP_INT_ENABLE, 0);
4923 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4924 		break;
4925 	case AMDGPU_IRQ_STATE_ENABLE:
4926 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4927 		cp_int_cntl =
4928 			REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4929 				      TIME_STAMP_INT_ENABLE, 1);
4930 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4931 		break;
4932 	default:
4933 		break;
4934 	}
4935 }
4936 
4937 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4938 						     int me, int pipe,
4939 						     enum amdgpu_interrupt_state state)
4940 {
4941 	u32 mec_int_cntl, mec_int_cntl_reg;
4942 
4943 	/*
4944 	 * amdgpu controls only pipe 0 of MEC1. That's why this function only
4945 	 * handles the setting of interrupts for this specific pipe. All other
4946 	 * pipes' interrupts are set by amdkfd.
4947 	 */
4948 
4949 	if (me == 1) {
4950 		switch (pipe) {
4951 		case 0:
4952 			mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
4953 			break;
4954 		default:
4955 			DRM_DEBUG("invalid pipe %d\n", pipe);
4956 			return;
4957 		}
4958 	} else {
4959 		DRM_DEBUG("invalid me %d\n", me);
4960 		return;
4961 	}
4962 
4963 	switch (state) {
4964 	case AMDGPU_IRQ_STATE_DISABLE:
4965 		mec_int_cntl = RREG32(mec_int_cntl_reg);
4966 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4967 					     TIME_STAMP_INT_ENABLE, 0);
4968 		WREG32(mec_int_cntl_reg, mec_int_cntl);
4969 		break;
4970 	case AMDGPU_IRQ_STATE_ENABLE:
4971 		mec_int_cntl = RREG32(mec_int_cntl_reg);
4972 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4973 					     TIME_STAMP_INT_ENABLE, 1);
4974 		WREG32(mec_int_cntl_reg, mec_int_cntl);
4975 		break;
4976 	default:
4977 		break;
4978 	}
4979 }
4980 
4981 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4982 					     struct amdgpu_irq_src *source,
4983 					     unsigned type,
4984 					     enum amdgpu_interrupt_state state)
4985 {
4986 	u32 cp_int_cntl;
4987 
4988 	switch (state) {
4989 	case AMDGPU_IRQ_STATE_DISABLE:
4990 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4991 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4992 					    PRIV_REG_INT_ENABLE, 0);
4993 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4994 		break;
4995 	case AMDGPU_IRQ_STATE_ENABLE:
4996 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4997 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4998 					    PRIV_REG_INT_ENABLE, 1);
4999 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5000 		break;
5001 	default:
5002 		break;
5003 	}
5004 
5005 	return 0;
5006 }
5007 
5008 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5009 					      struct amdgpu_irq_src *source,
5010 					      unsigned type,
5011 					      enum amdgpu_interrupt_state state)
5012 {
5013 	u32 cp_int_cntl;
5014 
5015 	switch (state) {
5016 	case AMDGPU_IRQ_STATE_DISABLE:
5017 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5018 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5019 					    PRIV_INSTR_INT_ENABLE, 0);
5020 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5021 		break;
5022 	case AMDGPU_IRQ_STATE_ENABLE:
5023 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5024 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5025 					    PRIV_INSTR_INT_ENABLE, 1);
5026 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5027 		break;
5028 	default:
5029 		break;
5030 	}
5031 
5032 	return 0;
5033 }
5034 
5035 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5036 					    struct amdgpu_irq_src *src,
5037 					    unsigned type,
5038 					    enum amdgpu_interrupt_state state)
5039 {
5040 	switch (type) {
5041 	case AMDGPU_CP_IRQ_GFX_EOP:
5042 		gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
5043 		break;
5044 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5045 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5046 		break;
5047 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5048 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5049 		break;
5050 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5051 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5052 		break;
5053 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5054 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5055 		break;
5056 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5057 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5058 		break;
5059 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5060 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5061 		break;
5062 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5063 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5064 		break;
5065 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5066 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5067 		break;
5068 	default:
5069 		break;
5070 	}
5071 	return 0;
5072 }
5073 
5074 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
5075 			    struct amdgpu_irq_src *source,
5076 			    struct amdgpu_iv_entry *entry)
5077 {
5078 	int i;
5079 	u8 me_id, pipe_id, queue_id;
5080 	struct amdgpu_ring *ring;
5081 
5082 	DRM_DEBUG("IH: CP EOP\n");
5083 	me_id = (entry->ring_id & 0x0c) >> 2;
5084 	pipe_id = (entry->ring_id & 0x03) >> 0;
5085 	queue_id = (entry->ring_id & 0x70) >> 4;
5086 
5087 	switch (me_id) {
5088 	case 0:
5089 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5090 		break;
5091 	case 1:
5092 	case 2:
5093 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5094 			ring = &adev->gfx.compute_ring[i];
5095 			/* Per-queue interrupt is supported for MEC starting from VI.
5096 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
5097 			  */
5098 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5099 				amdgpu_fence_process(ring);
5100 		}
5101 		break;
5102 	}
5103 	return 0;
5104 }
5105 
5106 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
5107 				 struct amdgpu_irq_src *source,
5108 				 struct amdgpu_iv_entry *entry)
5109 {
5110 	DRM_ERROR("Illegal register access in command stream\n");
5111 	schedule_work(&adev->reset_work);
5112 	return 0;
5113 }
5114 
5115 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
5116 				  struct amdgpu_irq_src *source,
5117 				  struct amdgpu_iv_entry *entry)
5118 {
5119 	DRM_ERROR("Illegal instruction in command stream\n");
5120 	schedule_work(&adev->reset_work);
5121 	return 0;
5122 }
5123 
5124 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
5125 	.early_init = gfx_v8_0_early_init,
5126 	.late_init = gfx_v8_0_late_init,
5127 	.sw_init = gfx_v8_0_sw_init,
5128 	.sw_fini = gfx_v8_0_sw_fini,
5129 	.hw_init = gfx_v8_0_hw_init,
5130 	.hw_fini = gfx_v8_0_hw_fini,
5131 	.suspend = gfx_v8_0_suspend,
5132 	.resume = gfx_v8_0_resume,
5133 	.is_idle = gfx_v8_0_is_idle,
5134 	.wait_for_idle = gfx_v8_0_wait_for_idle,
5135 	.soft_reset = gfx_v8_0_soft_reset,
5136 	.print_status = gfx_v8_0_print_status,
5137 	.set_clockgating_state = gfx_v8_0_set_clockgating_state,
5138 	.set_powergating_state = gfx_v8_0_set_powergating_state,
5139 };
5140 
5141 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
5142 	.get_rptr = gfx_v8_0_ring_get_rptr_gfx,
5143 	.get_wptr = gfx_v8_0_ring_get_wptr_gfx,
5144 	.set_wptr = gfx_v8_0_ring_set_wptr_gfx,
5145 	.parse_cs = NULL,
5146 	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
5147 	.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
5148 	.emit_semaphore = gfx_v8_0_ring_emit_semaphore,
5149 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
5150 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
5151 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
5152 	.test_ring = gfx_v8_0_ring_test_ring,
5153 	.test_ib = gfx_v8_0_ring_test_ib,
5154 	.insert_nop = amdgpu_ring_insert_nop,
5155 };
5156 
5157 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
5158 	.get_rptr = gfx_v8_0_ring_get_rptr_compute,
5159 	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
5160 	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
5161 	.parse_cs = NULL,
5162 	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
5163 	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
5164 	.emit_semaphore = gfx_v8_0_ring_emit_semaphore,
5165 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
5166 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
5167 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
5168 	.test_ring = gfx_v8_0_ring_test_ring,
5169 	.test_ib = gfx_v8_0_ring_test_ib,
5170 	.insert_nop = amdgpu_ring_insert_nop,
5171 };
5172 
5173 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
5174 {
5175 	int i;
5176 
5177 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5178 		adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
5179 
5180 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
5181 		adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
5182 }
5183 
5184 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
5185 	.set = gfx_v8_0_set_eop_interrupt_state,
5186 	.process = gfx_v8_0_eop_irq,
5187 };
5188 
5189 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
5190 	.set = gfx_v8_0_set_priv_reg_fault_state,
5191 	.process = gfx_v8_0_priv_reg_irq,
5192 };
5193 
5194 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
5195 	.set = gfx_v8_0_set_priv_inst_fault_state,
5196 	.process = gfx_v8_0_priv_inst_irq,
5197 };
5198 
5199 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
5200 {
5201 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5202 	adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
5203 
5204 	adev->gfx.priv_reg_irq.num_types = 1;
5205 	adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
5206 
5207 	adev->gfx.priv_inst_irq.num_types = 1;
5208 	adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
5209 }
5210 
5211 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
5212 {
5213 	/* init asci gds info */
5214 	adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
5215 	adev->gds.gws.total_size = 64;
5216 	adev->gds.oa.total_size = 16;
5217 
5218 	if (adev->gds.mem.total_size == 64 * 1024) {
5219 		adev->gds.mem.gfx_partition_size = 4096;
5220 		adev->gds.mem.cs_partition_size = 4096;
5221 
5222 		adev->gds.gws.gfx_partition_size = 4;
5223 		adev->gds.gws.cs_partition_size = 4;
5224 
5225 		adev->gds.oa.gfx_partition_size = 4;
5226 		adev->gds.oa.cs_partition_size = 1;
5227 	} else {
5228 		adev->gds.mem.gfx_partition_size = 1024;
5229 		adev->gds.mem.cs_partition_size = 1024;
5230 
5231 		adev->gds.gws.gfx_partition_size = 16;
5232 		adev->gds.gws.cs_partition_size = 16;
5233 
5234 		adev->gds.oa.gfx_partition_size = 4;
5235 		adev->gds.oa.cs_partition_size = 4;
5236 	}
5237 }
5238 
5239 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev,
5240 		u32 se, u32 sh)
5241 {
5242 	u32 mask = 0, tmp, tmp1;
5243 	int i;
5244 
5245 	gfx_v8_0_select_se_sh(adev, se, sh);
5246 	tmp = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
5247 	tmp1 = RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
5248 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
5249 
5250 	tmp &= 0xffff0000;
5251 
5252 	tmp |= tmp1;
5253 	tmp >>= 16;
5254 
5255 	for (i = 0; i < adev->gfx.config.max_cu_per_sh; i ++) {
5256 		mask <<= 1;
5257 		mask |= 1;
5258 	}
5259 
5260 	return (~tmp) & mask;
5261 }
5262 
5263 int gfx_v8_0_get_cu_info(struct amdgpu_device *adev,
5264 						 struct amdgpu_cu_info *cu_info)
5265 {
5266 	int i, j, k, counter, active_cu_number = 0;
5267 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5268 
5269 	if (!adev || !cu_info)
5270 		return -EINVAL;
5271 
5272 	mutex_lock(&adev->grbm_idx_mutex);
5273 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5274 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5275 			mask = 1;
5276 			ao_bitmap = 0;
5277 			counter = 0;
5278 			bitmap = gfx_v8_0_get_cu_active_bitmap(adev, i, j);
5279 			cu_info->bitmap[i][j] = bitmap;
5280 
5281 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
5282 				if (bitmap & mask) {
5283 					if (counter < 2)
5284 						ao_bitmap |= mask;
5285 					counter ++;
5286 				}
5287 				mask <<= 1;
5288 			}
5289 			active_cu_number += counter;
5290 			ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5291 		}
5292 	}
5293 
5294 	cu_info->number = active_cu_number;
5295 	cu_info->ao_cu_mask = ao_cu_mask;
5296 	mutex_unlock(&adev->grbm_idx_mutex);
5297 	return 0;
5298 }
5299