xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c (revision 3dc4b6fb)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "vi.h"
33 #include "vi_structs.h"
34 #include "vid.h"
35 #include "amdgpu_ucode.h"
36 #include "amdgpu_atombios.h"
37 #include "atombios_i2c.h"
38 #include "clearstate_vi.h"
39 
40 #include "gmc/gmc_8_2_d.h"
41 #include "gmc/gmc_8_2_sh_mask.h"
42 
43 #include "oss/oss_3_0_d.h"
44 #include "oss/oss_3_0_sh_mask.h"
45 
46 #include "bif/bif_5_0_d.h"
47 #include "bif/bif_5_0_sh_mask.h"
48 #include "gca/gfx_8_0_d.h"
49 #include "gca/gfx_8_0_enum.h"
50 #include "gca/gfx_8_0_sh_mask.h"
51 
52 #include "dce/dce_10_0_d.h"
53 #include "dce/dce_10_0_sh_mask.h"
54 
55 #include "smu/smu_7_1_3_d.h"
56 
57 #include "ivsrcid/ivsrcid_vislands30.h"
58 
59 #define GFX8_NUM_GFX_RINGS     1
60 #define GFX8_MEC_HPD_SIZE 4096
61 
62 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
63 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
64 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
65 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
66 
67 #define ARRAY_MODE(x)					((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
68 #define PIPE_CONFIG(x)					((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
69 #define TILE_SPLIT(x)					((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
70 #define MICRO_TILE_MODE_NEW(x)				((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
71 #define SAMPLE_SPLIT(x)					((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
72 #define BANK_WIDTH(x)					((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
73 #define BANK_HEIGHT(x)					((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
74 #define MACRO_TILE_ASPECT(x)				((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
75 #define NUM_BANKS(x)					((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
76 
77 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
78 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
79 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
80 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
81 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
82 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
83 
84 /* BPM SERDES CMD */
85 #define SET_BPM_SERDES_CMD    1
86 #define CLE_BPM_SERDES_CMD    0
87 
88 /* BPM Register Address*/
89 enum {
90 	BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
91 	BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
92 	BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
93 	BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
94 	BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
95 	BPM_REG_FGCG_MAX
96 };
97 
98 #define RLC_FormatDirectRegListLength        14
99 
100 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
101 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
102 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
103 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
104 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
105 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
106 
107 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
108 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
110 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
111 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
112 
113 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
114 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
115 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
116 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
117 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
118 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
119 
120 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
121 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
122 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
123 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
124 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
125 
126 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
127 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
128 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
129 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
130 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
131 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
132 
133 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
134 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
141 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
142 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
143 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
144 
145 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
146 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
147 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
148 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
149 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
150 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
151 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
152 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
153 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
154 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
155 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
156 
157 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
158 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
163 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
164 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
165 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
166 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
167 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
168 
169 MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
170 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
171 MODULE_FIRMWARE("amdgpu/vegam_me.bin");
172 MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
173 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
174 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
175 
176 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
177 {
178 	{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
179 	{mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
180 	{mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
181 	{mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
182 	{mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
183 	{mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
184 	{mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
185 	{mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
186 	{mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
187 	{mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
188 	{mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
189 	{mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
190 	{mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
191 	{mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
192 	{mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
193 	{mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
194 };
195 
196 static const u32 golden_settings_tonga_a11[] =
197 {
198 	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
199 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
200 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
201 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
202 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
203 	mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
204 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
205 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
206 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
207 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
208 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
209 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
210 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
211 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
212 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
213 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
214 };
215 
216 static const u32 tonga_golden_common_all[] =
217 {
218 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
219 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
220 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
221 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
222 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
223 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
224 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
225 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
226 };
227 
228 static const u32 tonga_mgcg_cgcg_init[] =
229 {
230 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
231 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
232 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
233 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
234 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
235 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
236 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
237 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
238 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
239 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
240 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
241 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
242 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
243 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
244 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
245 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
246 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
247 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
248 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
249 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
250 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
251 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
252 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
253 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
254 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
255 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
256 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
257 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
258 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
259 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
260 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
261 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
262 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
263 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
264 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
265 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
266 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
267 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
268 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
269 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
270 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
271 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
272 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
273 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
274 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
275 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
276 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
277 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
278 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
279 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
280 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
281 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
282 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
283 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
284 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
285 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
286 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
287 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
288 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
289 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
290 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
291 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
292 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
293 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
294 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
295 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
296 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
297 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
298 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
299 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
300 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
301 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
302 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
303 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
304 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
305 };
306 
307 static const u32 golden_settings_vegam_a11[] =
308 {
309 	mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
310 	mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
311 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
312 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
313 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
314 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
315 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
316 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
317 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
318 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
319 	mmSQ_CONFIG, 0x07f80000, 0x01180000,
320 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
321 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
322 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
323 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
324 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
325 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
326 };
327 
328 static const u32 vegam_golden_common_all[] =
329 {
330 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
331 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
332 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
333 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
334 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
335 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
336 };
337 
338 static const u32 golden_settings_polaris11_a11[] =
339 {
340 	mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
341 	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
342 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
343 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
344 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
345 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
346 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
347 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
348 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
349 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
350 	mmSQ_CONFIG, 0x07f80000, 0x01180000,
351 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
352 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
353 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
354 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
355 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
356 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
357 };
358 
359 static const u32 polaris11_golden_common_all[] =
360 {
361 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
362 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
363 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
364 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
365 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
366 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
367 };
368 
369 static const u32 golden_settings_polaris10_a11[] =
370 {
371 	mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
372 	mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
373 	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
374 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
375 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
376 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
377 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
378 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
379 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
380 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
381 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
382 	mmSQ_CONFIG, 0x07f80000, 0x07180000,
383 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
384 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
385 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
386 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
387 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
388 };
389 
390 static const u32 polaris10_golden_common_all[] =
391 {
392 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
393 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
394 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
395 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
396 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
397 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
398 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
399 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
400 };
401 
402 static const u32 fiji_golden_common_all[] =
403 {
404 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
405 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
406 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
407 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
408 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
409 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
410 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
411 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
412 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
413 	mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
414 };
415 
416 static const u32 golden_settings_fiji_a10[] =
417 {
418 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
419 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
420 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
421 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
422 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
423 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
424 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
425 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
426 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
427 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
428 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
429 };
430 
431 static const u32 fiji_mgcg_cgcg_init[] =
432 {
433 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
434 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
435 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
436 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
437 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
438 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
439 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
440 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
441 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
442 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
443 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
444 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
445 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
446 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
447 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
448 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
449 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
450 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
451 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
452 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
453 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
454 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
455 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
456 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
457 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
458 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
459 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
460 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
461 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
462 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
463 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
464 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
465 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
466 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
467 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
468 };
469 
470 static const u32 golden_settings_iceland_a11[] =
471 {
472 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
473 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
474 	mmDB_DEBUG3, 0xc0000000, 0xc0000000,
475 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
476 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
477 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
478 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
479 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
480 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
481 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
482 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
483 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
484 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
485 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
486 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
487 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
488 };
489 
490 static const u32 iceland_golden_common_all[] =
491 {
492 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
493 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
494 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
495 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
496 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
497 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
498 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
499 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
500 };
501 
502 static const u32 iceland_mgcg_cgcg_init[] =
503 {
504 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
505 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
506 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
507 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
508 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
509 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
510 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
511 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
512 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
513 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
514 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
515 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
516 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
517 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
518 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
519 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
520 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
521 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
522 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
523 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
524 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
525 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
526 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
527 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
528 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
529 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
530 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
531 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
532 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
533 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
534 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
535 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
536 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
537 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
538 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
539 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
540 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
541 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
542 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
543 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
544 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
545 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
546 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
547 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
548 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
549 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
550 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
551 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
552 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
553 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
554 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
555 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
556 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
557 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
558 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
559 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
560 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
561 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
562 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
563 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
564 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
565 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
566 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
567 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
568 };
569 
570 static const u32 cz_golden_settings_a11[] =
571 {
572 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
573 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
574 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
575 	mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
576 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
577 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
578 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
579 	mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
580 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
581 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
582 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
583 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
584 };
585 
586 static const u32 cz_golden_common_all[] =
587 {
588 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
589 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
590 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
591 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
592 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
593 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
594 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
595 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
596 };
597 
598 static const u32 cz_mgcg_cgcg_init[] =
599 {
600 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
601 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
602 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
603 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
604 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
605 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
606 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
607 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
608 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
609 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
610 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
611 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
612 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
613 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
614 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
615 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
616 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
617 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
618 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
619 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
620 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
621 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
622 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
623 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
624 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
625 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
626 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
627 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
628 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
629 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
630 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
631 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
632 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
633 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
634 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
635 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
636 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
637 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
638 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
639 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
640 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
641 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
642 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
643 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
644 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
645 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
646 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
647 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
648 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
649 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
650 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
651 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
652 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
653 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
654 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
655 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
656 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
657 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
658 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
659 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
660 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
661 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
662 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
663 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
664 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
665 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
666 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
667 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
668 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
669 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
670 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
671 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
672 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
673 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
674 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
675 };
676 
677 static const u32 stoney_golden_settings_a11[] =
678 {
679 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
680 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
681 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
682 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
683 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
684 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
685 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
686 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
687 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
688 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
689 };
690 
691 static const u32 stoney_golden_common_all[] =
692 {
693 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
694 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
695 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
696 	mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
697 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
698 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
699 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
700 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
701 };
702 
703 static const u32 stoney_mgcg_cgcg_init[] =
704 {
705 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
706 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
707 	mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
708 	mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
709 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
710 };
711 
712 
713 static const char * const sq_edc_source_names[] = {
714 	"SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
715 	"SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
716 	"SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
717 	"SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
718 	"SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
719 	"SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
720 	"SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
721 };
722 
723 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
724 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
725 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
726 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
727 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
728 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
729 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
730 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
731 
732 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
733 {
734 	switch (adev->asic_type) {
735 	case CHIP_TOPAZ:
736 		amdgpu_device_program_register_sequence(adev,
737 							iceland_mgcg_cgcg_init,
738 							ARRAY_SIZE(iceland_mgcg_cgcg_init));
739 		amdgpu_device_program_register_sequence(adev,
740 							golden_settings_iceland_a11,
741 							ARRAY_SIZE(golden_settings_iceland_a11));
742 		amdgpu_device_program_register_sequence(adev,
743 							iceland_golden_common_all,
744 							ARRAY_SIZE(iceland_golden_common_all));
745 		break;
746 	case CHIP_FIJI:
747 		amdgpu_device_program_register_sequence(adev,
748 							fiji_mgcg_cgcg_init,
749 							ARRAY_SIZE(fiji_mgcg_cgcg_init));
750 		amdgpu_device_program_register_sequence(adev,
751 							golden_settings_fiji_a10,
752 							ARRAY_SIZE(golden_settings_fiji_a10));
753 		amdgpu_device_program_register_sequence(adev,
754 							fiji_golden_common_all,
755 							ARRAY_SIZE(fiji_golden_common_all));
756 		break;
757 
758 	case CHIP_TONGA:
759 		amdgpu_device_program_register_sequence(adev,
760 							tonga_mgcg_cgcg_init,
761 							ARRAY_SIZE(tonga_mgcg_cgcg_init));
762 		amdgpu_device_program_register_sequence(adev,
763 							golden_settings_tonga_a11,
764 							ARRAY_SIZE(golden_settings_tonga_a11));
765 		amdgpu_device_program_register_sequence(adev,
766 							tonga_golden_common_all,
767 							ARRAY_SIZE(tonga_golden_common_all));
768 		break;
769 	case CHIP_VEGAM:
770 		amdgpu_device_program_register_sequence(adev,
771 							golden_settings_vegam_a11,
772 							ARRAY_SIZE(golden_settings_vegam_a11));
773 		amdgpu_device_program_register_sequence(adev,
774 							vegam_golden_common_all,
775 							ARRAY_SIZE(vegam_golden_common_all));
776 		break;
777 	case CHIP_POLARIS11:
778 	case CHIP_POLARIS12:
779 		amdgpu_device_program_register_sequence(adev,
780 							golden_settings_polaris11_a11,
781 							ARRAY_SIZE(golden_settings_polaris11_a11));
782 		amdgpu_device_program_register_sequence(adev,
783 							polaris11_golden_common_all,
784 							ARRAY_SIZE(polaris11_golden_common_all));
785 		break;
786 	case CHIP_POLARIS10:
787 		amdgpu_device_program_register_sequence(adev,
788 							golden_settings_polaris10_a11,
789 							ARRAY_SIZE(golden_settings_polaris10_a11));
790 		amdgpu_device_program_register_sequence(adev,
791 							polaris10_golden_common_all,
792 							ARRAY_SIZE(polaris10_golden_common_all));
793 		WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
794 		if (adev->pdev->revision == 0xc7 &&
795 		    ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
796 		     (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
797 		     (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
798 			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
799 			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
800 		}
801 		break;
802 	case CHIP_CARRIZO:
803 		amdgpu_device_program_register_sequence(adev,
804 							cz_mgcg_cgcg_init,
805 							ARRAY_SIZE(cz_mgcg_cgcg_init));
806 		amdgpu_device_program_register_sequence(adev,
807 							cz_golden_settings_a11,
808 							ARRAY_SIZE(cz_golden_settings_a11));
809 		amdgpu_device_program_register_sequence(adev,
810 							cz_golden_common_all,
811 							ARRAY_SIZE(cz_golden_common_all));
812 		break;
813 	case CHIP_STONEY:
814 		amdgpu_device_program_register_sequence(adev,
815 							stoney_mgcg_cgcg_init,
816 							ARRAY_SIZE(stoney_mgcg_cgcg_init));
817 		amdgpu_device_program_register_sequence(adev,
818 							stoney_golden_settings_a11,
819 							ARRAY_SIZE(stoney_golden_settings_a11));
820 		amdgpu_device_program_register_sequence(adev,
821 							stoney_golden_common_all,
822 							ARRAY_SIZE(stoney_golden_common_all));
823 		break;
824 	default:
825 		break;
826 	}
827 }
828 
829 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
830 {
831 	adev->gfx.scratch.num_reg = 8;
832 	adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
833 	adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
834 }
835 
836 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
837 {
838 	struct amdgpu_device *adev = ring->adev;
839 	uint32_t scratch;
840 	uint32_t tmp = 0;
841 	unsigned i;
842 	int r;
843 
844 	r = amdgpu_gfx_scratch_get(adev, &scratch);
845 	if (r)
846 		return r;
847 
848 	WREG32(scratch, 0xCAFEDEAD);
849 	r = amdgpu_ring_alloc(ring, 3);
850 	if (r)
851 		goto error_free_scratch;
852 
853 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
854 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
855 	amdgpu_ring_write(ring, 0xDEADBEEF);
856 	amdgpu_ring_commit(ring);
857 
858 	for (i = 0; i < adev->usec_timeout; i++) {
859 		tmp = RREG32(scratch);
860 		if (tmp == 0xDEADBEEF)
861 			break;
862 		udelay(1);
863 	}
864 
865 	if (i >= adev->usec_timeout)
866 		r = -ETIMEDOUT;
867 
868 error_free_scratch:
869 	amdgpu_gfx_scratch_free(adev, scratch);
870 	return r;
871 }
872 
873 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
874 {
875 	struct amdgpu_device *adev = ring->adev;
876 	struct amdgpu_ib ib;
877 	struct dma_fence *f = NULL;
878 
879 	unsigned int index;
880 	uint64_t gpu_addr;
881 	uint32_t tmp;
882 	long r;
883 
884 	r = amdgpu_device_wb_get(adev, &index);
885 	if (r)
886 		return r;
887 
888 	gpu_addr = adev->wb.gpu_addr + (index * 4);
889 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
890 	memset(&ib, 0, sizeof(ib));
891 	r = amdgpu_ib_get(adev, NULL, 16, &ib);
892 	if (r)
893 		goto err1;
894 
895 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
896 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
897 	ib.ptr[2] = lower_32_bits(gpu_addr);
898 	ib.ptr[3] = upper_32_bits(gpu_addr);
899 	ib.ptr[4] = 0xDEADBEEF;
900 	ib.length_dw = 5;
901 
902 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
903 	if (r)
904 		goto err2;
905 
906 	r = dma_fence_wait_timeout(f, false, timeout);
907 	if (r == 0) {
908 		r = -ETIMEDOUT;
909 		goto err2;
910 	} else if (r < 0) {
911 		goto err2;
912 	}
913 
914 	tmp = adev->wb.wb[index];
915 	if (tmp == 0xDEADBEEF)
916 		r = 0;
917 	else
918 		r = -EINVAL;
919 
920 err2:
921 	amdgpu_ib_free(adev, &ib, NULL);
922 	dma_fence_put(f);
923 err1:
924 	amdgpu_device_wb_free(adev, index);
925 	return r;
926 }
927 
928 
929 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
930 {
931 	release_firmware(adev->gfx.pfp_fw);
932 	adev->gfx.pfp_fw = NULL;
933 	release_firmware(adev->gfx.me_fw);
934 	adev->gfx.me_fw = NULL;
935 	release_firmware(adev->gfx.ce_fw);
936 	adev->gfx.ce_fw = NULL;
937 	release_firmware(adev->gfx.rlc_fw);
938 	adev->gfx.rlc_fw = NULL;
939 	release_firmware(adev->gfx.mec_fw);
940 	adev->gfx.mec_fw = NULL;
941 	if ((adev->asic_type != CHIP_STONEY) &&
942 	    (adev->asic_type != CHIP_TOPAZ))
943 		release_firmware(adev->gfx.mec2_fw);
944 	adev->gfx.mec2_fw = NULL;
945 
946 	kfree(adev->gfx.rlc.register_list_format);
947 }
948 
949 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
950 {
951 	const char *chip_name;
952 	char fw_name[30];
953 	int err;
954 	struct amdgpu_firmware_info *info = NULL;
955 	const struct common_firmware_header *header = NULL;
956 	const struct gfx_firmware_header_v1_0 *cp_hdr;
957 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
958 	unsigned int *tmp = NULL, i;
959 
960 	DRM_DEBUG("\n");
961 
962 	switch (adev->asic_type) {
963 	case CHIP_TOPAZ:
964 		chip_name = "topaz";
965 		break;
966 	case CHIP_TONGA:
967 		chip_name = "tonga";
968 		break;
969 	case CHIP_CARRIZO:
970 		chip_name = "carrizo";
971 		break;
972 	case CHIP_FIJI:
973 		chip_name = "fiji";
974 		break;
975 	case CHIP_STONEY:
976 		chip_name = "stoney";
977 		break;
978 	case CHIP_POLARIS10:
979 		chip_name = "polaris10";
980 		break;
981 	case CHIP_POLARIS11:
982 		chip_name = "polaris11";
983 		break;
984 	case CHIP_POLARIS12:
985 		chip_name = "polaris12";
986 		break;
987 	case CHIP_VEGAM:
988 		chip_name = "vegam";
989 		break;
990 	default:
991 		BUG();
992 	}
993 
994 	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
995 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
996 		err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
997 		if (err == -ENOENT) {
998 			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
999 			err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1000 		}
1001 	} else {
1002 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1003 		err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1004 	}
1005 	if (err)
1006 		goto out;
1007 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1008 	if (err)
1009 		goto out;
1010 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1011 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1012 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1013 
1014 	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1015 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1016 		err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1017 		if (err == -ENOENT) {
1018 			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1019 			err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1020 		}
1021 	} else {
1022 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1023 		err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1024 	}
1025 	if (err)
1026 		goto out;
1027 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
1028 	if (err)
1029 		goto out;
1030 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1031 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1032 
1033 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1034 
1035 	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1036 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1037 		err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1038 		if (err == -ENOENT) {
1039 			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1040 			err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1041 		}
1042 	} else {
1043 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1044 		err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1045 	}
1046 	if (err)
1047 		goto out;
1048 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1049 	if (err)
1050 		goto out;
1051 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1052 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1053 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1054 
1055 	/*
1056 	 * Support for MCBP/Virtualization in combination with chained IBs is
1057 	 * formal released on feature version #46
1058 	 */
1059 	if (adev->gfx.ce_feature_version >= 46 &&
1060 	    adev->gfx.pfp_feature_version >= 46) {
1061 		adev->virt.chained_ib_support = true;
1062 		DRM_INFO("Chained IB support enabled!\n");
1063 	} else
1064 		adev->virt.chained_ib_support = false;
1065 
1066 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1067 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1068 	if (err)
1069 		goto out;
1070 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1071 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1072 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1073 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1074 
1075 	adev->gfx.rlc.save_and_restore_offset =
1076 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
1077 	adev->gfx.rlc.clear_state_descriptor_offset =
1078 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1079 	adev->gfx.rlc.avail_scratch_ram_locations =
1080 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1081 	adev->gfx.rlc.reg_restore_list_size =
1082 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
1083 	adev->gfx.rlc.reg_list_format_start =
1084 			le32_to_cpu(rlc_hdr->reg_list_format_start);
1085 	adev->gfx.rlc.reg_list_format_separate_start =
1086 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1087 	adev->gfx.rlc.starting_offsets_start =
1088 			le32_to_cpu(rlc_hdr->starting_offsets_start);
1089 	adev->gfx.rlc.reg_list_format_size_bytes =
1090 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1091 	adev->gfx.rlc.reg_list_size_bytes =
1092 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1093 
1094 	adev->gfx.rlc.register_list_format =
1095 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1096 					adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1097 
1098 	if (!adev->gfx.rlc.register_list_format) {
1099 		err = -ENOMEM;
1100 		goto out;
1101 	}
1102 
1103 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1104 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1105 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1106 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
1107 
1108 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1109 
1110 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1111 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1112 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1113 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1114 
1115 	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1116 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1117 		err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1118 		if (err == -ENOENT) {
1119 			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1120 			err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1121 		}
1122 	} else {
1123 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1124 		err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1125 	}
1126 	if (err)
1127 		goto out;
1128 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1129 	if (err)
1130 		goto out;
1131 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1132 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1133 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1134 
1135 	if ((adev->asic_type != CHIP_STONEY) &&
1136 	    (adev->asic_type != CHIP_TOPAZ)) {
1137 		if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1138 			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1139 			err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1140 			if (err == -ENOENT) {
1141 				snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1142 				err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1143 			}
1144 		} else {
1145 			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1146 			err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1147 		}
1148 		if (!err) {
1149 			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1150 			if (err)
1151 				goto out;
1152 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1153 				adev->gfx.mec2_fw->data;
1154 			adev->gfx.mec2_fw_version =
1155 				le32_to_cpu(cp_hdr->header.ucode_version);
1156 			adev->gfx.mec2_feature_version =
1157 				le32_to_cpu(cp_hdr->ucode_feature_version);
1158 		} else {
1159 			err = 0;
1160 			adev->gfx.mec2_fw = NULL;
1161 		}
1162 	}
1163 
1164 	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1165 	info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1166 	info->fw = adev->gfx.pfp_fw;
1167 	header = (const struct common_firmware_header *)info->fw->data;
1168 	adev->firmware.fw_size +=
1169 		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1170 
1171 	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1172 	info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1173 	info->fw = adev->gfx.me_fw;
1174 	header = (const struct common_firmware_header *)info->fw->data;
1175 	adev->firmware.fw_size +=
1176 		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1177 
1178 	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1179 	info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1180 	info->fw = adev->gfx.ce_fw;
1181 	header = (const struct common_firmware_header *)info->fw->data;
1182 	adev->firmware.fw_size +=
1183 		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1184 
1185 	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1186 	info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1187 	info->fw = adev->gfx.rlc_fw;
1188 	header = (const struct common_firmware_header *)info->fw->data;
1189 	adev->firmware.fw_size +=
1190 		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1191 
1192 	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1193 	info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1194 	info->fw = adev->gfx.mec_fw;
1195 	header = (const struct common_firmware_header *)info->fw->data;
1196 	adev->firmware.fw_size +=
1197 		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1198 
1199 	/* we need account JT in */
1200 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1201 	adev->firmware.fw_size +=
1202 		ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1203 
1204 	if (amdgpu_sriov_vf(adev)) {
1205 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1206 		info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1207 		info->fw = adev->gfx.mec_fw;
1208 		adev->firmware.fw_size +=
1209 			ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1210 	}
1211 
1212 	if (adev->gfx.mec2_fw) {
1213 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1214 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1215 		info->fw = adev->gfx.mec2_fw;
1216 		header = (const struct common_firmware_header *)info->fw->data;
1217 		adev->firmware.fw_size +=
1218 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1219 	}
1220 
1221 out:
1222 	if (err) {
1223 		dev_err(adev->dev,
1224 			"gfx8: Failed to load firmware \"%s\"\n",
1225 			fw_name);
1226 		release_firmware(adev->gfx.pfp_fw);
1227 		adev->gfx.pfp_fw = NULL;
1228 		release_firmware(adev->gfx.me_fw);
1229 		adev->gfx.me_fw = NULL;
1230 		release_firmware(adev->gfx.ce_fw);
1231 		adev->gfx.ce_fw = NULL;
1232 		release_firmware(adev->gfx.rlc_fw);
1233 		adev->gfx.rlc_fw = NULL;
1234 		release_firmware(adev->gfx.mec_fw);
1235 		adev->gfx.mec_fw = NULL;
1236 		release_firmware(adev->gfx.mec2_fw);
1237 		adev->gfx.mec2_fw = NULL;
1238 	}
1239 	return err;
1240 }
1241 
1242 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1243 				    volatile u32 *buffer)
1244 {
1245 	u32 count = 0, i;
1246 	const struct cs_section_def *sect = NULL;
1247 	const struct cs_extent_def *ext = NULL;
1248 
1249 	if (adev->gfx.rlc.cs_data == NULL)
1250 		return;
1251 	if (buffer == NULL)
1252 		return;
1253 
1254 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1255 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1256 
1257 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1258 	buffer[count++] = cpu_to_le32(0x80000000);
1259 	buffer[count++] = cpu_to_le32(0x80000000);
1260 
1261 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1262 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1263 			if (sect->id == SECT_CONTEXT) {
1264 				buffer[count++] =
1265 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1266 				buffer[count++] = cpu_to_le32(ext->reg_index -
1267 						PACKET3_SET_CONTEXT_REG_START);
1268 				for (i = 0; i < ext->reg_count; i++)
1269 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1270 			} else {
1271 				return;
1272 			}
1273 		}
1274 	}
1275 
1276 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1277 	buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1278 			PACKET3_SET_CONTEXT_REG_START);
1279 	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1280 	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1281 
1282 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1283 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1284 
1285 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1286 	buffer[count++] = cpu_to_le32(0);
1287 }
1288 
1289 static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
1290 {
1291 	if (adev->asic_type == CHIP_CARRIZO)
1292 		return 5;
1293 	else
1294 		return 4;
1295 }
1296 
1297 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1298 {
1299 	const struct cs_section_def *cs_data;
1300 	int r;
1301 
1302 	adev->gfx.rlc.cs_data = vi_cs_data;
1303 
1304 	cs_data = adev->gfx.rlc.cs_data;
1305 
1306 	if (cs_data) {
1307 		/* init clear state block */
1308 		r = amdgpu_gfx_rlc_init_csb(adev);
1309 		if (r)
1310 			return r;
1311 	}
1312 
1313 	if ((adev->asic_type == CHIP_CARRIZO) ||
1314 	    (adev->asic_type == CHIP_STONEY)) {
1315 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1316 		r = amdgpu_gfx_rlc_init_cpt(adev);
1317 		if (r)
1318 			return r;
1319 	}
1320 
1321 	return 0;
1322 }
1323 
1324 static int gfx_v8_0_csb_vram_pin(struct amdgpu_device *adev)
1325 {
1326 	int r;
1327 
1328 	r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1329 	if (unlikely(r != 0))
1330 		return r;
1331 
1332 	r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1333 			AMDGPU_GEM_DOMAIN_VRAM);
1334 	if (!r)
1335 		adev->gfx.rlc.clear_state_gpu_addr =
1336 			amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1337 
1338 	amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1339 
1340 	return r;
1341 }
1342 
1343 static void gfx_v8_0_csb_vram_unpin(struct amdgpu_device *adev)
1344 {
1345 	int r;
1346 
1347 	if (!adev->gfx.rlc.clear_state_obj)
1348 		return;
1349 
1350 	r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1351 	if (likely(r == 0)) {
1352 		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1353 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1354 	}
1355 }
1356 
1357 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1358 {
1359 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1360 }
1361 
1362 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1363 {
1364 	int r;
1365 	u32 *hpd;
1366 	size_t mec_hpd_size;
1367 
1368 	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1369 
1370 	/* take ownership of the relevant compute queues */
1371 	amdgpu_gfx_compute_queue_acquire(adev);
1372 
1373 	mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1374 
1375 	r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1376 				      AMDGPU_GEM_DOMAIN_VRAM,
1377 				      &adev->gfx.mec.hpd_eop_obj,
1378 				      &adev->gfx.mec.hpd_eop_gpu_addr,
1379 				      (void **)&hpd);
1380 	if (r) {
1381 		dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1382 		return r;
1383 	}
1384 
1385 	memset(hpd, 0, mec_hpd_size);
1386 
1387 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1388 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1389 
1390 	return 0;
1391 }
1392 
1393 static const u32 vgpr_init_compute_shader[] =
1394 {
1395 	0x7e000209, 0x7e020208,
1396 	0x7e040207, 0x7e060206,
1397 	0x7e080205, 0x7e0a0204,
1398 	0x7e0c0203, 0x7e0e0202,
1399 	0x7e100201, 0x7e120200,
1400 	0x7e140209, 0x7e160208,
1401 	0x7e180207, 0x7e1a0206,
1402 	0x7e1c0205, 0x7e1e0204,
1403 	0x7e200203, 0x7e220202,
1404 	0x7e240201, 0x7e260200,
1405 	0x7e280209, 0x7e2a0208,
1406 	0x7e2c0207, 0x7e2e0206,
1407 	0x7e300205, 0x7e320204,
1408 	0x7e340203, 0x7e360202,
1409 	0x7e380201, 0x7e3a0200,
1410 	0x7e3c0209, 0x7e3e0208,
1411 	0x7e400207, 0x7e420206,
1412 	0x7e440205, 0x7e460204,
1413 	0x7e480203, 0x7e4a0202,
1414 	0x7e4c0201, 0x7e4e0200,
1415 	0x7e500209, 0x7e520208,
1416 	0x7e540207, 0x7e560206,
1417 	0x7e580205, 0x7e5a0204,
1418 	0x7e5c0203, 0x7e5e0202,
1419 	0x7e600201, 0x7e620200,
1420 	0x7e640209, 0x7e660208,
1421 	0x7e680207, 0x7e6a0206,
1422 	0x7e6c0205, 0x7e6e0204,
1423 	0x7e700203, 0x7e720202,
1424 	0x7e740201, 0x7e760200,
1425 	0x7e780209, 0x7e7a0208,
1426 	0x7e7c0207, 0x7e7e0206,
1427 	0xbf8a0000, 0xbf810000,
1428 };
1429 
1430 static const u32 sgpr_init_compute_shader[] =
1431 {
1432 	0xbe8a0100, 0xbe8c0102,
1433 	0xbe8e0104, 0xbe900106,
1434 	0xbe920108, 0xbe940100,
1435 	0xbe960102, 0xbe980104,
1436 	0xbe9a0106, 0xbe9c0108,
1437 	0xbe9e0100, 0xbea00102,
1438 	0xbea20104, 0xbea40106,
1439 	0xbea60108, 0xbea80100,
1440 	0xbeaa0102, 0xbeac0104,
1441 	0xbeae0106, 0xbeb00108,
1442 	0xbeb20100, 0xbeb40102,
1443 	0xbeb60104, 0xbeb80106,
1444 	0xbeba0108, 0xbebc0100,
1445 	0xbebe0102, 0xbec00104,
1446 	0xbec20106, 0xbec40108,
1447 	0xbec60100, 0xbec80102,
1448 	0xbee60004, 0xbee70005,
1449 	0xbeea0006, 0xbeeb0007,
1450 	0xbee80008, 0xbee90009,
1451 	0xbefc0000, 0xbf8a0000,
1452 	0xbf810000, 0x00000000,
1453 };
1454 
1455 static const u32 vgpr_init_regs[] =
1456 {
1457 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1458 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1459 	mmCOMPUTE_NUM_THREAD_X, 256*4,
1460 	mmCOMPUTE_NUM_THREAD_Y, 1,
1461 	mmCOMPUTE_NUM_THREAD_Z, 1,
1462 	mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1463 	mmCOMPUTE_PGM_RSRC2, 20,
1464 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1465 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1466 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1467 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1468 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1469 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1470 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1471 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1472 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1473 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1474 };
1475 
1476 static const u32 sgpr1_init_regs[] =
1477 {
1478 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1479 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1480 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1481 	mmCOMPUTE_NUM_THREAD_Y, 1,
1482 	mmCOMPUTE_NUM_THREAD_Z, 1,
1483 	mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1484 	mmCOMPUTE_PGM_RSRC2, 20,
1485 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1486 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1487 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1488 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1489 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1490 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1491 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1492 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1493 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1494 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1495 };
1496 
1497 static const u32 sgpr2_init_regs[] =
1498 {
1499 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1500 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1501 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1502 	mmCOMPUTE_NUM_THREAD_Y, 1,
1503 	mmCOMPUTE_NUM_THREAD_Z, 1,
1504 	mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1505 	mmCOMPUTE_PGM_RSRC2, 20,
1506 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1507 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1508 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1509 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1510 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1511 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1512 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1513 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1514 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1515 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1516 };
1517 
1518 static const u32 sec_ded_counter_registers[] =
1519 {
1520 	mmCPC_EDC_ATC_CNT,
1521 	mmCPC_EDC_SCRATCH_CNT,
1522 	mmCPC_EDC_UCODE_CNT,
1523 	mmCPF_EDC_ATC_CNT,
1524 	mmCPF_EDC_ROQ_CNT,
1525 	mmCPF_EDC_TAG_CNT,
1526 	mmCPG_EDC_ATC_CNT,
1527 	mmCPG_EDC_DMA_CNT,
1528 	mmCPG_EDC_TAG_CNT,
1529 	mmDC_EDC_CSINVOC_CNT,
1530 	mmDC_EDC_RESTORE_CNT,
1531 	mmDC_EDC_STATE_CNT,
1532 	mmGDS_EDC_CNT,
1533 	mmGDS_EDC_GRBM_CNT,
1534 	mmGDS_EDC_OA_DED,
1535 	mmSPI_EDC_CNT,
1536 	mmSQC_ATC_EDC_GATCL1_CNT,
1537 	mmSQC_EDC_CNT,
1538 	mmSQ_EDC_DED_CNT,
1539 	mmSQ_EDC_INFO,
1540 	mmSQ_EDC_SEC_CNT,
1541 	mmTCC_EDC_CNT,
1542 	mmTCP_ATC_EDC_GATCL1_CNT,
1543 	mmTCP_EDC_CNT,
1544 	mmTD_EDC_CNT
1545 };
1546 
1547 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1548 {
1549 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1550 	struct amdgpu_ib ib;
1551 	struct dma_fence *f = NULL;
1552 	int r, i;
1553 	u32 tmp;
1554 	unsigned total_size, vgpr_offset, sgpr_offset;
1555 	u64 gpu_addr;
1556 
1557 	/* only supported on CZ */
1558 	if (adev->asic_type != CHIP_CARRIZO)
1559 		return 0;
1560 
1561 	/* bail if the compute ring is not ready */
1562 	if (!ring->sched.ready)
1563 		return 0;
1564 
1565 	tmp = RREG32(mmGB_EDC_MODE);
1566 	WREG32(mmGB_EDC_MODE, 0);
1567 
1568 	total_size =
1569 		(((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1570 	total_size +=
1571 		(((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1572 	total_size +=
1573 		(((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1574 	total_size = ALIGN(total_size, 256);
1575 	vgpr_offset = total_size;
1576 	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1577 	sgpr_offset = total_size;
1578 	total_size += sizeof(sgpr_init_compute_shader);
1579 
1580 	/* allocate an indirect buffer to put the commands in */
1581 	memset(&ib, 0, sizeof(ib));
1582 	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1583 	if (r) {
1584 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1585 		return r;
1586 	}
1587 
1588 	/* load the compute shaders */
1589 	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1590 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1591 
1592 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1593 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1594 
1595 	/* init the ib length to 0 */
1596 	ib.length_dw = 0;
1597 
1598 	/* VGPR */
1599 	/* write the register state for the compute dispatch */
1600 	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1601 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1602 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1603 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1604 	}
1605 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1606 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1607 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1608 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1609 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1610 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1611 
1612 	/* write dispatch packet */
1613 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1614 	ib.ptr[ib.length_dw++] = 8; /* x */
1615 	ib.ptr[ib.length_dw++] = 1; /* y */
1616 	ib.ptr[ib.length_dw++] = 1; /* z */
1617 	ib.ptr[ib.length_dw++] =
1618 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1619 
1620 	/* write CS partial flush packet */
1621 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1622 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1623 
1624 	/* SGPR1 */
1625 	/* write the register state for the compute dispatch */
1626 	for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1627 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1628 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1629 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1630 	}
1631 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1632 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1633 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1634 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1635 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1636 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1637 
1638 	/* write dispatch packet */
1639 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1640 	ib.ptr[ib.length_dw++] = 8; /* x */
1641 	ib.ptr[ib.length_dw++] = 1; /* y */
1642 	ib.ptr[ib.length_dw++] = 1; /* z */
1643 	ib.ptr[ib.length_dw++] =
1644 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1645 
1646 	/* write CS partial flush packet */
1647 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1648 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1649 
1650 	/* SGPR2 */
1651 	/* write the register state for the compute dispatch */
1652 	for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1653 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1654 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1655 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1656 	}
1657 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1658 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1659 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1660 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1661 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1662 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1663 
1664 	/* write dispatch packet */
1665 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1666 	ib.ptr[ib.length_dw++] = 8; /* x */
1667 	ib.ptr[ib.length_dw++] = 1; /* y */
1668 	ib.ptr[ib.length_dw++] = 1; /* z */
1669 	ib.ptr[ib.length_dw++] =
1670 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1671 
1672 	/* write CS partial flush packet */
1673 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1674 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1675 
1676 	/* shedule the ib on the ring */
1677 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1678 	if (r) {
1679 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1680 		goto fail;
1681 	}
1682 
1683 	/* wait for the GPU to finish processing the IB */
1684 	r = dma_fence_wait(f, false);
1685 	if (r) {
1686 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1687 		goto fail;
1688 	}
1689 
1690 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1691 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1692 	WREG32(mmGB_EDC_MODE, tmp);
1693 
1694 	tmp = RREG32(mmCC_GC_EDC_CONFIG);
1695 	tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1696 	WREG32(mmCC_GC_EDC_CONFIG, tmp);
1697 
1698 
1699 	/* read back registers to clear the counters */
1700 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1701 		RREG32(sec_ded_counter_registers[i]);
1702 
1703 fail:
1704 	amdgpu_ib_free(adev, &ib, NULL);
1705 	dma_fence_put(f);
1706 
1707 	return r;
1708 }
1709 
1710 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1711 {
1712 	u32 gb_addr_config;
1713 	u32 mc_shared_chmap, mc_arb_ramcfg;
1714 	u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1715 	u32 tmp;
1716 	int ret;
1717 
1718 	switch (adev->asic_type) {
1719 	case CHIP_TOPAZ:
1720 		adev->gfx.config.max_shader_engines = 1;
1721 		adev->gfx.config.max_tile_pipes = 2;
1722 		adev->gfx.config.max_cu_per_sh = 6;
1723 		adev->gfx.config.max_sh_per_se = 1;
1724 		adev->gfx.config.max_backends_per_se = 2;
1725 		adev->gfx.config.max_texture_channel_caches = 2;
1726 		adev->gfx.config.max_gprs = 256;
1727 		adev->gfx.config.max_gs_threads = 32;
1728 		adev->gfx.config.max_hw_contexts = 8;
1729 
1730 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1731 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1732 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1733 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1734 		gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1735 		break;
1736 	case CHIP_FIJI:
1737 		adev->gfx.config.max_shader_engines = 4;
1738 		adev->gfx.config.max_tile_pipes = 16;
1739 		adev->gfx.config.max_cu_per_sh = 16;
1740 		adev->gfx.config.max_sh_per_se = 1;
1741 		adev->gfx.config.max_backends_per_se = 4;
1742 		adev->gfx.config.max_texture_channel_caches = 16;
1743 		adev->gfx.config.max_gprs = 256;
1744 		adev->gfx.config.max_gs_threads = 32;
1745 		adev->gfx.config.max_hw_contexts = 8;
1746 
1747 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1748 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1749 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1750 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1751 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1752 		break;
1753 	case CHIP_POLARIS11:
1754 	case CHIP_POLARIS12:
1755 		ret = amdgpu_atombios_get_gfx_info(adev);
1756 		if (ret)
1757 			return ret;
1758 		adev->gfx.config.max_gprs = 256;
1759 		adev->gfx.config.max_gs_threads = 32;
1760 		adev->gfx.config.max_hw_contexts = 8;
1761 
1762 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1763 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1764 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1765 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1766 		gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1767 		break;
1768 	case CHIP_POLARIS10:
1769 	case CHIP_VEGAM:
1770 		ret = amdgpu_atombios_get_gfx_info(adev);
1771 		if (ret)
1772 			return ret;
1773 		adev->gfx.config.max_gprs = 256;
1774 		adev->gfx.config.max_gs_threads = 32;
1775 		adev->gfx.config.max_hw_contexts = 8;
1776 
1777 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1778 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1779 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1780 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1781 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1782 		break;
1783 	case CHIP_TONGA:
1784 		adev->gfx.config.max_shader_engines = 4;
1785 		adev->gfx.config.max_tile_pipes = 8;
1786 		adev->gfx.config.max_cu_per_sh = 8;
1787 		adev->gfx.config.max_sh_per_se = 1;
1788 		adev->gfx.config.max_backends_per_se = 2;
1789 		adev->gfx.config.max_texture_channel_caches = 8;
1790 		adev->gfx.config.max_gprs = 256;
1791 		adev->gfx.config.max_gs_threads = 32;
1792 		adev->gfx.config.max_hw_contexts = 8;
1793 
1794 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1795 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1796 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1797 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1798 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1799 		break;
1800 	case CHIP_CARRIZO:
1801 		adev->gfx.config.max_shader_engines = 1;
1802 		adev->gfx.config.max_tile_pipes = 2;
1803 		adev->gfx.config.max_sh_per_se = 1;
1804 		adev->gfx.config.max_backends_per_se = 2;
1805 		adev->gfx.config.max_cu_per_sh = 8;
1806 		adev->gfx.config.max_texture_channel_caches = 2;
1807 		adev->gfx.config.max_gprs = 256;
1808 		adev->gfx.config.max_gs_threads = 32;
1809 		adev->gfx.config.max_hw_contexts = 8;
1810 
1811 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1812 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1813 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1814 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1815 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1816 		break;
1817 	case CHIP_STONEY:
1818 		adev->gfx.config.max_shader_engines = 1;
1819 		adev->gfx.config.max_tile_pipes = 2;
1820 		adev->gfx.config.max_sh_per_se = 1;
1821 		adev->gfx.config.max_backends_per_se = 1;
1822 		adev->gfx.config.max_cu_per_sh = 3;
1823 		adev->gfx.config.max_texture_channel_caches = 2;
1824 		adev->gfx.config.max_gprs = 256;
1825 		adev->gfx.config.max_gs_threads = 16;
1826 		adev->gfx.config.max_hw_contexts = 8;
1827 
1828 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1829 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1830 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1831 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1832 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1833 		break;
1834 	default:
1835 		adev->gfx.config.max_shader_engines = 2;
1836 		adev->gfx.config.max_tile_pipes = 4;
1837 		adev->gfx.config.max_cu_per_sh = 2;
1838 		adev->gfx.config.max_sh_per_se = 1;
1839 		adev->gfx.config.max_backends_per_se = 2;
1840 		adev->gfx.config.max_texture_channel_caches = 4;
1841 		adev->gfx.config.max_gprs = 256;
1842 		adev->gfx.config.max_gs_threads = 32;
1843 		adev->gfx.config.max_hw_contexts = 8;
1844 
1845 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1846 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1847 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1848 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1849 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1850 		break;
1851 	}
1852 
1853 	mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1854 	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1855 	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1856 
1857 	adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1858 	adev->gfx.config.mem_max_burst_length_bytes = 256;
1859 	if (adev->flags & AMD_IS_APU) {
1860 		/* Get memory bank mapping mode. */
1861 		tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1862 		dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1863 		dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1864 
1865 		tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1866 		dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1867 		dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1868 
1869 		/* Validate settings in case only one DIMM installed. */
1870 		if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1871 			dimm00_addr_map = 0;
1872 		if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1873 			dimm01_addr_map = 0;
1874 		if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1875 			dimm10_addr_map = 0;
1876 		if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1877 			dimm11_addr_map = 0;
1878 
1879 		/* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1880 		/* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1881 		if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1882 			adev->gfx.config.mem_row_size_in_kb = 2;
1883 		else
1884 			adev->gfx.config.mem_row_size_in_kb = 1;
1885 	} else {
1886 		tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1887 		adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1888 		if (adev->gfx.config.mem_row_size_in_kb > 4)
1889 			adev->gfx.config.mem_row_size_in_kb = 4;
1890 	}
1891 
1892 	adev->gfx.config.shader_engine_tile_size = 32;
1893 	adev->gfx.config.num_gpus = 1;
1894 	adev->gfx.config.multi_gpu_tile_size = 64;
1895 
1896 	/* fix up row size */
1897 	switch (adev->gfx.config.mem_row_size_in_kb) {
1898 	case 1:
1899 	default:
1900 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1901 		break;
1902 	case 2:
1903 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1904 		break;
1905 	case 4:
1906 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1907 		break;
1908 	}
1909 	adev->gfx.config.gb_addr_config = gb_addr_config;
1910 
1911 	return 0;
1912 }
1913 
1914 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1915 					int mec, int pipe, int queue)
1916 {
1917 	int r;
1918 	unsigned irq_type;
1919 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1920 
1921 	ring = &adev->gfx.compute_ring[ring_id];
1922 
1923 	/* mec0 is me1 */
1924 	ring->me = mec + 1;
1925 	ring->pipe = pipe;
1926 	ring->queue = queue;
1927 
1928 	ring->ring_obj = NULL;
1929 	ring->use_doorbell = true;
1930 	ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
1931 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1932 				+ (ring_id * GFX8_MEC_HPD_SIZE);
1933 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1934 
1935 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1936 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1937 		+ ring->pipe;
1938 
1939 	/* type-2 packets are deprecated on MEC, use type-3 instead */
1940 	r = amdgpu_ring_init(adev, ring, 1024,
1941 			&adev->gfx.eop_irq, irq_type);
1942 	if (r)
1943 		return r;
1944 
1945 
1946 	return 0;
1947 }
1948 
1949 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
1950 
1951 static int gfx_v8_0_sw_init(void *handle)
1952 {
1953 	int i, j, k, r, ring_id;
1954 	struct amdgpu_ring *ring;
1955 	struct amdgpu_kiq *kiq;
1956 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1957 
1958 	switch (adev->asic_type) {
1959 	case CHIP_TONGA:
1960 	case CHIP_CARRIZO:
1961 	case CHIP_FIJI:
1962 	case CHIP_POLARIS10:
1963 	case CHIP_POLARIS11:
1964 	case CHIP_POLARIS12:
1965 	case CHIP_VEGAM:
1966 		adev->gfx.mec.num_mec = 2;
1967 		break;
1968 	case CHIP_TOPAZ:
1969 	case CHIP_STONEY:
1970 	default:
1971 		adev->gfx.mec.num_mec = 1;
1972 		break;
1973 	}
1974 
1975 	adev->gfx.mec.num_pipe_per_mec = 4;
1976 	adev->gfx.mec.num_queue_per_pipe = 8;
1977 
1978 	/* EOP Event */
1979 	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
1980 	if (r)
1981 		return r;
1982 
1983 	/* Privileged reg */
1984 	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
1985 			      &adev->gfx.priv_reg_irq);
1986 	if (r)
1987 		return r;
1988 
1989 	/* Privileged inst */
1990 	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
1991 			      &adev->gfx.priv_inst_irq);
1992 	if (r)
1993 		return r;
1994 
1995 	/* Add CP EDC/ECC irq  */
1996 	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
1997 			      &adev->gfx.cp_ecc_error_irq);
1998 	if (r)
1999 		return r;
2000 
2001 	/* SQ interrupts. */
2002 	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
2003 			      &adev->gfx.sq_irq);
2004 	if (r) {
2005 		DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
2006 		return r;
2007 	}
2008 
2009 	INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
2010 
2011 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2012 
2013 	gfx_v8_0_scratch_init(adev);
2014 
2015 	r = gfx_v8_0_init_microcode(adev);
2016 	if (r) {
2017 		DRM_ERROR("Failed to load gfx firmware!\n");
2018 		return r;
2019 	}
2020 
2021 	r = adev->gfx.rlc.funcs->init(adev);
2022 	if (r) {
2023 		DRM_ERROR("Failed to init rlc BOs!\n");
2024 		return r;
2025 	}
2026 
2027 	r = gfx_v8_0_mec_init(adev);
2028 	if (r) {
2029 		DRM_ERROR("Failed to init MEC BOs!\n");
2030 		return r;
2031 	}
2032 
2033 	/* set up the gfx ring */
2034 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2035 		ring = &adev->gfx.gfx_ring[i];
2036 		ring->ring_obj = NULL;
2037 		sprintf(ring->name, "gfx");
2038 		/* no gfx doorbells on iceland */
2039 		if (adev->asic_type != CHIP_TOPAZ) {
2040 			ring->use_doorbell = true;
2041 			ring->doorbell_index = adev->doorbell_index.gfx_ring0;
2042 		}
2043 
2044 		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2045 				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
2046 		if (r)
2047 			return r;
2048 	}
2049 
2050 
2051 	/* set up the compute queues - allocate horizontally across pipes */
2052 	ring_id = 0;
2053 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2054 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2055 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2056 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2057 					continue;
2058 
2059 				r = gfx_v8_0_compute_ring_init(adev,
2060 								ring_id,
2061 								i, k, j);
2062 				if (r)
2063 					return r;
2064 
2065 				ring_id++;
2066 			}
2067 		}
2068 	}
2069 
2070 	r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2071 	if (r) {
2072 		DRM_ERROR("Failed to init KIQ BOs!\n");
2073 		return r;
2074 	}
2075 
2076 	kiq = &adev->gfx.kiq;
2077 	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2078 	if (r)
2079 		return r;
2080 
2081 	/* create MQD for all compute queues as well as KIQ for SRIOV case */
2082 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2083 	if (r)
2084 		return r;
2085 
2086 	adev->gfx.ce_ram_size = 0x8000;
2087 
2088 	r = gfx_v8_0_gpu_early_init(adev);
2089 	if (r)
2090 		return r;
2091 
2092 	return 0;
2093 }
2094 
2095 static int gfx_v8_0_sw_fini(void *handle)
2096 {
2097 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2098 	int i;
2099 
2100 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2101 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2102 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2103 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2104 
2105 	amdgpu_gfx_mqd_sw_fini(adev);
2106 	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2107 	amdgpu_gfx_kiq_fini(adev);
2108 
2109 	gfx_v8_0_mec_fini(adev);
2110 	amdgpu_gfx_rlc_fini(adev);
2111 	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2112 				&adev->gfx.rlc.clear_state_gpu_addr,
2113 				(void **)&adev->gfx.rlc.cs_ptr);
2114 	if ((adev->asic_type == CHIP_CARRIZO) ||
2115 	    (adev->asic_type == CHIP_STONEY)) {
2116 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2117 				&adev->gfx.rlc.cp_table_gpu_addr,
2118 				(void **)&adev->gfx.rlc.cp_table_ptr);
2119 	}
2120 	gfx_v8_0_free_microcode(adev);
2121 
2122 	return 0;
2123 }
2124 
2125 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2126 {
2127 	uint32_t *modearray, *mod2array;
2128 	const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2129 	const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2130 	u32 reg_offset;
2131 
2132 	modearray = adev->gfx.config.tile_mode_array;
2133 	mod2array = adev->gfx.config.macrotile_mode_array;
2134 
2135 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2136 		modearray[reg_offset] = 0;
2137 
2138 	for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2139 		mod2array[reg_offset] = 0;
2140 
2141 	switch (adev->asic_type) {
2142 	case CHIP_TOPAZ:
2143 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2144 				PIPE_CONFIG(ADDR_SURF_P2) |
2145 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2146 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2147 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2148 				PIPE_CONFIG(ADDR_SURF_P2) |
2149 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2150 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2151 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2152 				PIPE_CONFIG(ADDR_SURF_P2) |
2153 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2154 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2155 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2156 				PIPE_CONFIG(ADDR_SURF_P2) |
2157 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2158 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2159 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2160 				PIPE_CONFIG(ADDR_SURF_P2) |
2161 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2162 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2163 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2164 				PIPE_CONFIG(ADDR_SURF_P2) |
2165 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2166 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2167 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2168 				PIPE_CONFIG(ADDR_SURF_P2) |
2169 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2170 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2171 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2172 				PIPE_CONFIG(ADDR_SURF_P2));
2173 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2174 				PIPE_CONFIG(ADDR_SURF_P2) |
2175 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2176 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2177 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2178 				 PIPE_CONFIG(ADDR_SURF_P2) |
2179 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2180 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2181 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2182 				 PIPE_CONFIG(ADDR_SURF_P2) |
2183 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2184 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2185 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2186 				 PIPE_CONFIG(ADDR_SURF_P2) |
2187 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2188 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2189 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2190 				 PIPE_CONFIG(ADDR_SURF_P2) |
2191 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2192 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2193 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2194 				 PIPE_CONFIG(ADDR_SURF_P2) |
2195 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2196 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2197 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2198 				 PIPE_CONFIG(ADDR_SURF_P2) |
2199 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2200 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2201 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2202 				 PIPE_CONFIG(ADDR_SURF_P2) |
2203 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2204 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2205 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2206 				 PIPE_CONFIG(ADDR_SURF_P2) |
2207 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2208 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2209 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2210 				 PIPE_CONFIG(ADDR_SURF_P2) |
2211 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2212 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2213 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2214 				 PIPE_CONFIG(ADDR_SURF_P2) |
2215 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2216 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2217 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2218 				 PIPE_CONFIG(ADDR_SURF_P2) |
2219 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2220 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2221 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2222 				 PIPE_CONFIG(ADDR_SURF_P2) |
2223 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2224 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2225 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2226 				 PIPE_CONFIG(ADDR_SURF_P2) |
2227 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2228 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2229 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2230 				 PIPE_CONFIG(ADDR_SURF_P2) |
2231 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2232 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2233 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2234 				 PIPE_CONFIG(ADDR_SURF_P2) |
2235 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2236 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2237 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2238 				 PIPE_CONFIG(ADDR_SURF_P2) |
2239 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2240 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2241 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2242 				 PIPE_CONFIG(ADDR_SURF_P2) |
2243 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2244 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2245 
2246 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2247 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2248 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2249 				NUM_BANKS(ADDR_SURF_8_BANK));
2250 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2251 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2252 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2253 				NUM_BANKS(ADDR_SURF_8_BANK));
2254 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2255 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2256 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2257 				NUM_BANKS(ADDR_SURF_8_BANK));
2258 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2259 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2260 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2261 				NUM_BANKS(ADDR_SURF_8_BANK));
2262 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2263 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2264 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2265 				NUM_BANKS(ADDR_SURF_8_BANK));
2266 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2267 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2268 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2269 				NUM_BANKS(ADDR_SURF_8_BANK));
2270 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2271 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2272 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2273 				NUM_BANKS(ADDR_SURF_8_BANK));
2274 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2275 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2276 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2277 				NUM_BANKS(ADDR_SURF_16_BANK));
2278 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2279 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2280 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2281 				NUM_BANKS(ADDR_SURF_16_BANK));
2282 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2283 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2284 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2285 				 NUM_BANKS(ADDR_SURF_16_BANK));
2286 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2287 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2288 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2289 				 NUM_BANKS(ADDR_SURF_16_BANK));
2290 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2291 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2292 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2293 				 NUM_BANKS(ADDR_SURF_16_BANK));
2294 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2295 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2296 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2297 				 NUM_BANKS(ADDR_SURF_16_BANK));
2298 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2299 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2300 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2301 				 NUM_BANKS(ADDR_SURF_8_BANK));
2302 
2303 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2304 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2305 			    reg_offset != 23)
2306 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2307 
2308 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2309 			if (reg_offset != 7)
2310 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2311 
2312 		break;
2313 	case CHIP_FIJI:
2314 	case CHIP_VEGAM:
2315 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2316 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2317 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2318 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2319 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2320 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2321 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2322 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2323 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2324 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2325 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2326 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2327 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2328 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2329 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2330 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2331 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2332 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2333 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2334 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2335 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2336 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2337 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2338 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2339 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2340 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2341 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2342 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2343 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2344 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2345 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2346 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2347 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2348 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2349 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2350 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2351 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2352 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2353 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2354 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2355 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2356 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2357 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2358 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2359 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2360 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2361 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2362 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2363 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2364 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2365 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2366 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2367 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2368 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2369 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2370 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2371 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2372 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2373 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2374 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2375 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2376 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2377 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2378 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2379 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2380 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2381 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2382 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2383 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2384 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2385 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2386 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2387 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2388 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2389 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2390 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2391 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2392 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2393 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2394 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2395 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2396 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2397 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2398 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2399 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2400 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2401 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2402 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2403 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2404 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2405 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2406 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2407 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2408 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2409 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2410 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2411 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2412 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2413 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2414 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2415 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2416 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2417 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2418 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2419 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2420 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2421 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2422 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2423 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2424 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2425 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2426 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2427 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2428 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2429 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2430 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2431 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2432 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2433 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2434 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2435 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2436 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2437 
2438 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2439 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2440 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2441 				NUM_BANKS(ADDR_SURF_8_BANK));
2442 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2443 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2444 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2445 				NUM_BANKS(ADDR_SURF_8_BANK));
2446 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2447 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2448 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2449 				NUM_BANKS(ADDR_SURF_8_BANK));
2450 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2451 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2452 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2453 				NUM_BANKS(ADDR_SURF_8_BANK));
2454 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2455 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2456 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2457 				NUM_BANKS(ADDR_SURF_8_BANK));
2458 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2459 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2460 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2461 				NUM_BANKS(ADDR_SURF_8_BANK));
2462 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2463 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2464 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2465 				NUM_BANKS(ADDR_SURF_8_BANK));
2466 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2467 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2468 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2469 				NUM_BANKS(ADDR_SURF_8_BANK));
2470 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2471 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2472 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2473 				NUM_BANKS(ADDR_SURF_8_BANK));
2474 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2475 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2476 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2477 				 NUM_BANKS(ADDR_SURF_8_BANK));
2478 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2480 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2481 				 NUM_BANKS(ADDR_SURF_8_BANK));
2482 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2483 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2484 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2485 				 NUM_BANKS(ADDR_SURF_8_BANK));
2486 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2487 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2488 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2489 				 NUM_BANKS(ADDR_SURF_8_BANK));
2490 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2491 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2492 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2493 				 NUM_BANKS(ADDR_SURF_4_BANK));
2494 
2495 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2496 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2497 
2498 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2499 			if (reg_offset != 7)
2500 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2501 
2502 		break;
2503 	case CHIP_TONGA:
2504 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2505 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2506 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2507 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2508 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2509 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2510 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2511 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2512 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2513 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2514 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2515 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2516 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2517 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2518 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2519 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2520 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2521 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2522 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2523 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2524 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2525 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2526 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2527 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2528 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2529 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2530 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2531 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2532 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2533 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2534 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2535 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2536 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2537 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2538 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2539 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2540 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2541 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2542 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2543 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2544 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2545 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2546 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2547 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2549 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2550 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2551 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2552 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2553 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2554 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2555 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2556 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2557 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2558 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2559 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2560 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2561 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2562 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2563 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2564 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2565 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2566 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2567 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2568 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2569 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2570 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2571 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2572 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2573 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2574 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2575 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2576 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2577 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2578 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2579 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2580 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2581 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2582 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2583 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2584 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2585 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2586 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2587 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2588 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2589 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2590 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2591 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2592 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2593 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2594 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2595 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2596 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2597 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2598 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2599 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2600 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2601 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2602 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2603 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2604 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2605 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2606 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2607 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2608 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2609 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2610 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2611 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2612 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2613 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2614 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2615 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2616 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2617 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2618 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2619 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2620 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2621 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2622 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2623 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2624 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2625 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2626 
2627 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2629 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2630 				NUM_BANKS(ADDR_SURF_16_BANK));
2631 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2633 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2634 				NUM_BANKS(ADDR_SURF_16_BANK));
2635 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2636 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2637 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2638 				NUM_BANKS(ADDR_SURF_16_BANK));
2639 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2641 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2642 				NUM_BANKS(ADDR_SURF_16_BANK));
2643 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2644 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2645 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2646 				NUM_BANKS(ADDR_SURF_16_BANK));
2647 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2648 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2649 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2650 				NUM_BANKS(ADDR_SURF_16_BANK));
2651 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2652 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2653 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2654 				NUM_BANKS(ADDR_SURF_16_BANK));
2655 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2656 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2657 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2658 				NUM_BANKS(ADDR_SURF_16_BANK));
2659 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2660 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2661 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2662 				NUM_BANKS(ADDR_SURF_16_BANK));
2663 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2664 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2665 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2666 				 NUM_BANKS(ADDR_SURF_16_BANK));
2667 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2668 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2669 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2670 				 NUM_BANKS(ADDR_SURF_16_BANK));
2671 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2672 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2673 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2674 				 NUM_BANKS(ADDR_SURF_8_BANK));
2675 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2676 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2677 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2678 				 NUM_BANKS(ADDR_SURF_4_BANK));
2679 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2680 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2681 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2682 				 NUM_BANKS(ADDR_SURF_4_BANK));
2683 
2684 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2685 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2686 
2687 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2688 			if (reg_offset != 7)
2689 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2690 
2691 		break;
2692 	case CHIP_POLARIS11:
2693 	case CHIP_POLARIS12:
2694 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2695 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2696 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2697 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2698 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2699 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2700 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2701 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2702 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2703 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2704 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2705 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2706 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2707 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2708 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2709 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2710 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2711 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2712 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2713 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2714 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2715 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2716 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2717 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2718 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2719 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2720 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2721 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2722 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2723 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2724 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2725 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2726 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2727 				PIPE_CONFIG(ADDR_SURF_P4_16x16));
2728 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2729 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2730 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2731 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2732 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2733 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2734 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2735 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2736 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2737 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2738 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2739 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2740 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2741 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2742 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2743 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2744 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2745 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2746 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2747 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2748 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2749 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2750 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2751 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2752 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2753 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2754 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2755 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2756 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2757 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2758 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2759 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2760 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2761 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2762 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2763 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2764 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2765 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2766 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2767 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2768 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2769 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2770 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2771 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2772 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2773 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2774 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2775 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2776 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2777 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2778 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2779 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2780 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2781 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2782 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2783 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2784 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2785 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2786 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2787 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2788 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2789 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2790 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2791 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2792 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2793 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2794 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2795 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2796 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2797 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2798 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2799 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2800 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2801 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2802 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2803 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2804 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2805 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2806 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2807 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2808 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2809 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2810 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2811 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2812 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2813 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2814 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2815 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2816 
2817 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2818 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2819 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2820 				NUM_BANKS(ADDR_SURF_16_BANK));
2821 
2822 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2823 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2824 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2825 				NUM_BANKS(ADDR_SURF_16_BANK));
2826 
2827 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2828 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2829 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2830 				NUM_BANKS(ADDR_SURF_16_BANK));
2831 
2832 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2833 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2834 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2835 				NUM_BANKS(ADDR_SURF_16_BANK));
2836 
2837 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2839 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2840 				NUM_BANKS(ADDR_SURF_16_BANK));
2841 
2842 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2843 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2844 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2845 				NUM_BANKS(ADDR_SURF_16_BANK));
2846 
2847 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2848 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2849 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2850 				NUM_BANKS(ADDR_SURF_16_BANK));
2851 
2852 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2853 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2854 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2855 				NUM_BANKS(ADDR_SURF_16_BANK));
2856 
2857 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2858 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2859 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2860 				NUM_BANKS(ADDR_SURF_16_BANK));
2861 
2862 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2863 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2864 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2865 				NUM_BANKS(ADDR_SURF_16_BANK));
2866 
2867 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2868 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2869 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2870 				NUM_BANKS(ADDR_SURF_16_BANK));
2871 
2872 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2873 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2874 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2875 				NUM_BANKS(ADDR_SURF_16_BANK));
2876 
2877 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2878 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2879 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2880 				NUM_BANKS(ADDR_SURF_8_BANK));
2881 
2882 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2883 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2884 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2885 				NUM_BANKS(ADDR_SURF_4_BANK));
2886 
2887 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2888 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2889 
2890 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2891 			if (reg_offset != 7)
2892 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2893 
2894 		break;
2895 	case CHIP_POLARIS10:
2896 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2897 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2898 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2899 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2900 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2901 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2902 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2903 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2904 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2905 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2906 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2907 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2908 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2909 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2910 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2911 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2912 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2913 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2914 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2915 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2916 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2917 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2918 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2919 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2920 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2921 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2922 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2923 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2924 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2925 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2926 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2927 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2928 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2929 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2930 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2931 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2932 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2933 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2934 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2935 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2936 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2937 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2938 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2939 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2940 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2941 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2942 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2943 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2944 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2945 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2946 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2947 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2948 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2949 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2950 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2951 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2952 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2953 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2954 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2955 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2956 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2957 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2958 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2959 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2960 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2961 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2962 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2963 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2964 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2965 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2966 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2967 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2968 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2969 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2970 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2971 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2972 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2973 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2974 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2975 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2976 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2977 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2978 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2979 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2980 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2981 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2982 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2983 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2984 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2985 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2986 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2987 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2988 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2989 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2990 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2991 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2992 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2993 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2994 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2995 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2996 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2997 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2998 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2999 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3000 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3001 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3002 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3003 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3004 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3005 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3006 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3007 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3008 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3009 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3010 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3011 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3012 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3013 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3014 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3015 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3016 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3017 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3018 
3019 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3020 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3021 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3022 				NUM_BANKS(ADDR_SURF_16_BANK));
3023 
3024 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3025 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3026 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3027 				NUM_BANKS(ADDR_SURF_16_BANK));
3028 
3029 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3030 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3031 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3032 				NUM_BANKS(ADDR_SURF_16_BANK));
3033 
3034 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3035 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3036 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3037 				NUM_BANKS(ADDR_SURF_16_BANK));
3038 
3039 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3040 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3041 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3042 				NUM_BANKS(ADDR_SURF_16_BANK));
3043 
3044 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3045 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3046 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3047 				NUM_BANKS(ADDR_SURF_16_BANK));
3048 
3049 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3050 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3051 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3052 				NUM_BANKS(ADDR_SURF_16_BANK));
3053 
3054 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3055 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3056 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3057 				NUM_BANKS(ADDR_SURF_16_BANK));
3058 
3059 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3060 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3061 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3062 				NUM_BANKS(ADDR_SURF_16_BANK));
3063 
3064 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3065 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3066 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3067 				NUM_BANKS(ADDR_SURF_16_BANK));
3068 
3069 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3070 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3071 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3072 				NUM_BANKS(ADDR_SURF_16_BANK));
3073 
3074 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3075 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3076 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3077 				NUM_BANKS(ADDR_SURF_8_BANK));
3078 
3079 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3080 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3081 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3082 				NUM_BANKS(ADDR_SURF_4_BANK));
3083 
3084 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3085 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3086 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3087 				NUM_BANKS(ADDR_SURF_4_BANK));
3088 
3089 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3090 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3091 
3092 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3093 			if (reg_offset != 7)
3094 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3095 
3096 		break;
3097 	case CHIP_STONEY:
3098 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3099 				PIPE_CONFIG(ADDR_SURF_P2) |
3100 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3101 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3102 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3103 				PIPE_CONFIG(ADDR_SURF_P2) |
3104 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3105 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3106 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3107 				PIPE_CONFIG(ADDR_SURF_P2) |
3108 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3109 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3110 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3111 				PIPE_CONFIG(ADDR_SURF_P2) |
3112 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3113 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3114 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3115 				PIPE_CONFIG(ADDR_SURF_P2) |
3116 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3117 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3118 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3119 				PIPE_CONFIG(ADDR_SURF_P2) |
3120 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3121 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3122 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3123 				PIPE_CONFIG(ADDR_SURF_P2) |
3124 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3125 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3126 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3127 				PIPE_CONFIG(ADDR_SURF_P2));
3128 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3129 				PIPE_CONFIG(ADDR_SURF_P2) |
3130 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3131 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3132 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3133 				 PIPE_CONFIG(ADDR_SURF_P2) |
3134 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3135 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3136 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3137 				 PIPE_CONFIG(ADDR_SURF_P2) |
3138 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3139 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3140 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3141 				 PIPE_CONFIG(ADDR_SURF_P2) |
3142 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3143 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3144 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3145 				 PIPE_CONFIG(ADDR_SURF_P2) |
3146 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3147 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3148 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3149 				 PIPE_CONFIG(ADDR_SURF_P2) |
3150 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3151 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3152 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3153 				 PIPE_CONFIG(ADDR_SURF_P2) |
3154 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3155 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3156 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3157 				 PIPE_CONFIG(ADDR_SURF_P2) |
3158 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3159 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3160 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3161 				 PIPE_CONFIG(ADDR_SURF_P2) |
3162 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3163 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3164 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3165 				 PIPE_CONFIG(ADDR_SURF_P2) |
3166 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3167 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3168 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3169 				 PIPE_CONFIG(ADDR_SURF_P2) |
3170 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3171 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3172 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3173 				 PIPE_CONFIG(ADDR_SURF_P2) |
3174 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3175 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3176 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3177 				 PIPE_CONFIG(ADDR_SURF_P2) |
3178 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3179 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3180 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3181 				 PIPE_CONFIG(ADDR_SURF_P2) |
3182 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3183 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3184 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3185 				 PIPE_CONFIG(ADDR_SURF_P2) |
3186 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3187 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3188 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3189 				 PIPE_CONFIG(ADDR_SURF_P2) |
3190 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3191 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3192 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3193 				 PIPE_CONFIG(ADDR_SURF_P2) |
3194 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3195 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3196 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3197 				 PIPE_CONFIG(ADDR_SURF_P2) |
3198 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3199 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3200 
3201 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3202 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3203 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3204 				NUM_BANKS(ADDR_SURF_8_BANK));
3205 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3206 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3207 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3208 				NUM_BANKS(ADDR_SURF_8_BANK));
3209 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3210 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3211 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3212 				NUM_BANKS(ADDR_SURF_8_BANK));
3213 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3214 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3215 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3216 				NUM_BANKS(ADDR_SURF_8_BANK));
3217 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3218 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3219 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3220 				NUM_BANKS(ADDR_SURF_8_BANK));
3221 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3222 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3223 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3224 				NUM_BANKS(ADDR_SURF_8_BANK));
3225 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3226 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3227 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3228 				NUM_BANKS(ADDR_SURF_8_BANK));
3229 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3230 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3231 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3232 				NUM_BANKS(ADDR_SURF_16_BANK));
3233 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3234 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3235 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3236 				NUM_BANKS(ADDR_SURF_16_BANK));
3237 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3238 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3239 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3240 				 NUM_BANKS(ADDR_SURF_16_BANK));
3241 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3242 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3243 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3244 				 NUM_BANKS(ADDR_SURF_16_BANK));
3245 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3246 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3247 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3248 				 NUM_BANKS(ADDR_SURF_16_BANK));
3249 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3250 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3251 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3252 				 NUM_BANKS(ADDR_SURF_16_BANK));
3253 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3254 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3255 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3256 				 NUM_BANKS(ADDR_SURF_8_BANK));
3257 
3258 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3259 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3260 			    reg_offset != 23)
3261 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3262 
3263 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3264 			if (reg_offset != 7)
3265 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3266 
3267 		break;
3268 	default:
3269 		dev_warn(adev->dev,
3270 			 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3271 			 adev->asic_type);
3272 		/* fall through */
3273 
3274 	case CHIP_CARRIZO:
3275 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3276 				PIPE_CONFIG(ADDR_SURF_P2) |
3277 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3278 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3279 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3280 				PIPE_CONFIG(ADDR_SURF_P2) |
3281 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3282 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3283 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3284 				PIPE_CONFIG(ADDR_SURF_P2) |
3285 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3286 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3287 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3288 				PIPE_CONFIG(ADDR_SURF_P2) |
3289 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3290 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3291 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3292 				PIPE_CONFIG(ADDR_SURF_P2) |
3293 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3294 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3295 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3296 				PIPE_CONFIG(ADDR_SURF_P2) |
3297 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3298 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3299 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3300 				PIPE_CONFIG(ADDR_SURF_P2) |
3301 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3302 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3303 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3304 				PIPE_CONFIG(ADDR_SURF_P2));
3305 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3306 				PIPE_CONFIG(ADDR_SURF_P2) |
3307 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3308 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3309 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3310 				 PIPE_CONFIG(ADDR_SURF_P2) |
3311 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3312 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3313 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3314 				 PIPE_CONFIG(ADDR_SURF_P2) |
3315 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3316 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3317 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3318 				 PIPE_CONFIG(ADDR_SURF_P2) |
3319 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3320 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3321 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3322 				 PIPE_CONFIG(ADDR_SURF_P2) |
3323 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3324 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3325 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3326 				 PIPE_CONFIG(ADDR_SURF_P2) |
3327 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3328 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3329 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3330 				 PIPE_CONFIG(ADDR_SURF_P2) |
3331 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3332 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3333 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3334 				 PIPE_CONFIG(ADDR_SURF_P2) |
3335 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3336 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3337 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3338 				 PIPE_CONFIG(ADDR_SURF_P2) |
3339 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3340 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3341 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3342 				 PIPE_CONFIG(ADDR_SURF_P2) |
3343 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3344 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3345 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3346 				 PIPE_CONFIG(ADDR_SURF_P2) |
3347 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3348 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3349 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3350 				 PIPE_CONFIG(ADDR_SURF_P2) |
3351 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3352 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3353 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3354 				 PIPE_CONFIG(ADDR_SURF_P2) |
3355 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3356 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3357 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3358 				 PIPE_CONFIG(ADDR_SURF_P2) |
3359 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3360 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3361 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3362 				 PIPE_CONFIG(ADDR_SURF_P2) |
3363 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3364 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3365 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3366 				 PIPE_CONFIG(ADDR_SURF_P2) |
3367 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3368 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3369 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3370 				 PIPE_CONFIG(ADDR_SURF_P2) |
3371 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3372 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3373 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3374 				 PIPE_CONFIG(ADDR_SURF_P2) |
3375 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3376 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3377 
3378 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3379 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3380 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3381 				NUM_BANKS(ADDR_SURF_8_BANK));
3382 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3383 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3384 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3385 				NUM_BANKS(ADDR_SURF_8_BANK));
3386 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3387 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3388 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3389 				NUM_BANKS(ADDR_SURF_8_BANK));
3390 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3391 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3392 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3393 				NUM_BANKS(ADDR_SURF_8_BANK));
3394 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3395 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3396 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3397 				NUM_BANKS(ADDR_SURF_8_BANK));
3398 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3399 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3400 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3401 				NUM_BANKS(ADDR_SURF_8_BANK));
3402 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3403 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3404 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3405 				NUM_BANKS(ADDR_SURF_8_BANK));
3406 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3407 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3408 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3409 				NUM_BANKS(ADDR_SURF_16_BANK));
3410 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3411 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3412 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3413 				NUM_BANKS(ADDR_SURF_16_BANK));
3414 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3415 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3416 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3417 				 NUM_BANKS(ADDR_SURF_16_BANK));
3418 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3419 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3420 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3421 				 NUM_BANKS(ADDR_SURF_16_BANK));
3422 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3423 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3424 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3425 				 NUM_BANKS(ADDR_SURF_16_BANK));
3426 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3427 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3428 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3429 				 NUM_BANKS(ADDR_SURF_16_BANK));
3430 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3431 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3432 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3433 				 NUM_BANKS(ADDR_SURF_8_BANK));
3434 
3435 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3436 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3437 			    reg_offset != 23)
3438 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3439 
3440 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3441 			if (reg_offset != 7)
3442 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3443 
3444 		break;
3445 	}
3446 }
3447 
3448 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3449 				  u32 se_num, u32 sh_num, u32 instance)
3450 {
3451 	u32 data;
3452 
3453 	if (instance == 0xffffffff)
3454 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3455 	else
3456 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3457 
3458 	if (se_num == 0xffffffff)
3459 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3460 	else
3461 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3462 
3463 	if (sh_num == 0xffffffff)
3464 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3465 	else
3466 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3467 
3468 	WREG32(mmGRBM_GFX_INDEX, data);
3469 }
3470 
3471 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3472 				  u32 me, u32 pipe, u32 q, u32 vm)
3473 {
3474 	vi_srbm_select(adev, me, pipe, q, vm);
3475 }
3476 
3477 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3478 {
3479 	u32 data, mask;
3480 
3481 	data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3482 		RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3483 
3484 	data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3485 
3486 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3487 					 adev->gfx.config.max_sh_per_se);
3488 
3489 	return (~data) & mask;
3490 }
3491 
3492 static void
3493 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3494 {
3495 	switch (adev->asic_type) {
3496 	case CHIP_FIJI:
3497 	case CHIP_VEGAM:
3498 		*rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3499 			  RB_XSEL2(1) | PKR_MAP(2) |
3500 			  PKR_XSEL(1) | PKR_YSEL(1) |
3501 			  SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3502 		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3503 			   SE_PAIR_YSEL(2);
3504 		break;
3505 	case CHIP_TONGA:
3506 	case CHIP_POLARIS10:
3507 		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3508 			  SE_XSEL(1) | SE_YSEL(1);
3509 		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3510 			   SE_PAIR_YSEL(2);
3511 		break;
3512 	case CHIP_TOPAZ:
3513 	case CHIP_CARRIZO:
3514 		*rconf |= RB_MAP_PKR0(2);
3515 		*rconf1 |= 0x0;
3516 		break;
3517 	case CHIP_POLARIS11:
3518 	case CHIP_POLARIS12:
3519 		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3520 			  SE_XSEL(1) | SE_YSEL(1);
3521 		*rconf1 |= 0x0;
3522 		break;
3523 	case CHIP_STONEY:
3524 		*rconf |= 0x0;
3525 		*rconf1 |= 0x0;
3526 		break;
3527 	default:
3528 		DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3529 		break;
3530 	}
3531 }
3532 
3533 static void
3534 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3535 					u32 raster_config, u32 raster_config_1,
3536 					unsigned rb_mask, unsigned num_rb)
3537 {
3538 	unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3539 	unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3540 	unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3541 	unsigned rb_per_se = num_rb / num_se;
3542 	unsigned se_mask[4];
3543 	unsigned se;
3544 
3545 	se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3546 	se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3547 	se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3548 	se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3549 
3550 	WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3551 	WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3552 	WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3553 
3554 	if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3555 			     (!se_mask[2] && !se_mask[3]))) {
3556 		raster_config_1 &= ~SE_PAIR_MAP_MASK;
3557 
3558 		if (!se_mask[0] && !se_mask[1]) {
3559 			raster_config_1 |=
3560 				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3561 		} else {
3562 			raster_config_1 |=
3563 				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3564 		}
3565 	}
3566 
3567 	for (se = 0; se < num_se; se++) {
3568 		unsigned raster_config_se = raster_config;
3569 		unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3570 		unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3571 		int idx = (se / 2) * 2;
3572 
3573 		if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3574 			raster_config_se &= ~SE_MAP_MASK;
3575 
3576 			if (!se_mask[idx]) {
3577 				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3578 			} else {
3579 				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3580 			}
3581 		}
3582 
3583 		pkr0_mask &= rb_mask;
3584 		pkr1_mask &= rb_mask;
3585 		if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3586 			raster_config_se &= ~PKR_MAP_MASK;
3587 
3588 			if (!pkr0_mask) {
3589 				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3590 			} else {
3591 				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3592 			}
3593 		}
3594 
3595 		if (rb_per_se >= 2) {
3596 			unsigned rb0_mask = 1 << (se * rb_per_se);
3597 			unsigned rb1_mask = rb0_mask << 1;
3598 
3599 			rb0_mask &= rb_mask;
3600 			rb1_mask &= rb_mask;
3601 			if (!rb0_mask || !rb1_mask) {
3602 				raster_config_se &= ~RB_MAP_PKR0_MASK;
3603 
3604 				if (!rb0_mask) {
3605 					raster_config_se |=
3606 						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3607 				} else {
3608 					raster_config_se |=
3609 						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3610 				}
3611 			}
3612 
3613 			if (rb_per_se > 2) {
3614 				rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3615 				rb1_mask = rb0_mask << 1;
3616 				rb0_mask &= rb_mask;
3617 				rb1_mask &= rb_mask;
3618 				if (!rb0_mask || !rb1_mask) {
3619 					raster_config_se &= ~RB_MAP_PKR1_MASK;
3620 
3621 					if (!rb0_mask) {
3622 						raster_config_se |=
3623 							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3624 					} else {
3625 						raster_config_se |=
3626 							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3627 					}
3628 				}
3629 			}
3630 		}
3631 
3632 		/* GRBM_GFX_INDEX has a different offset on VI */
3633 		gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3634 		WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3635 		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3636 	}
3637 
3638 	/* GRBM_GFX_INDEX has a different offset on VI */
3639 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3640 }
3641 
3642 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3643 {
3644 	int i, j;
3645 	u32 data;
3646 	u32 raster_config = 0, raster_config_1 = 0;
3647 	u32 active_rbs = 0;
3648 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3649 					adev->gfx.config.max_sh_per_se;
3650 	unsigned num_rb_pipes;
3651 
3652 	mutex_lock(&adev->grbm_idx_mutex);
3653 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3654 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3655 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3656 			data = gfx_v8_0_get_rb_active_bitmap(adev);
3657 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3658 					       rb_bitmap_width_per_sh);
3659 		}
3660 	}
3661 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3662 
3663 	adev->gfx.config.backend_enable_mask = active_rbs;
3664 	adev->gfx.config.num_rbs = hweight32(active_rbs);
3665 
3666 	num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3667 			     adev->gfx.config.max_shader_engines, 16);
3668 
3669 	gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3670 
3671 	if (!adev->gfx.config.backend_enable_mask ||
3672 			adev->gfx.config.num_rbs >= num_rb_pipes) {
3673 		WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3674 		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3675 	} else {
3676 		gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3677 							adev->gfx.config.backend_enable_mask,
3678 							num_rb_pipes);
3679 	}
3680 
3681 	/* cache the values for userspace */
3682 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3683 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3684 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3685 			adev->gfx.config.rb_config[i][j].rb_backend_disable =
3686 				RREG32(mmCC_RB_BACKEND_DISABLE);
3687 			adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3688 				RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3689 			adev->gfx.config.rb_config[i][j].raster_config =
3690 				RREG32(mmPA_SC_RASTER_CONFIG);
3691 			adev->gfx.config.rb_config[i][j].raster_config_1 =
3692 				RREG32(mmPA_SC_RASTER_CONFIG_1);
3693 		}
3694 	}
3695 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3696 	mutex_unlock(&adev->grbm_idx_mutex);
3697 }
3698 
3699 /**
3700  * gfx_v8_0_init_compute_vmid - gart enable
3701  *
3702  * @adev: amdgpu_device pointer
3703  *
3704  * Initialize compute vmid sh_mem registers
3705  *
3706  */
3707 #define DEFAULT_SH_MEM_BASES	(0x6000)
3708 #define FIRST_COMPUTE_VMID	(8)
3709 #define LAST_COMPUTE_VMID	(16)
3710 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3711 {
3712 	int i;
3713 	uint32_t sh_mem_config;
3714 	uint32_t sh_mem_bases;
3715 
3716 	/*
3717 	 * Configure apertures:
3718 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3719 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3720 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3721 	 */
3722 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3723 
3724 	sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3725 			SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3726 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3727 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3728 			MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3729 			SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3730 
3731 	mutex_lock(&adev->srbm_mutex);
3732 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3733 		vi_srbm_select(adev, 0, 0, 0, i);
3734 		/* CP and shaders */
3735 		WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3736 		WREG32(mmSH_MEM_APE1_BASE, 1);
3737 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3738 		WREG32(mmSH_MEM_BASES, sh_mem_bases);
3739 	}
3740 	vi_srbm_select(adev, 0, 0, 0, 0);
3741 	mutex_unlock(&adev->srbm_mutex);
3742 
3743 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
3744 	   acccess. These should be enabled by FW for target VMIDs. */
3745 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3746 		WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
3747 		WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
3748 		WREG32(amdgpu_gds_reg_offset[i].gws, 0);
3749 		WREG32(amdgpu_gds_reg_offset[i].oa, 0);
3750 	}
3751 }
3752 
3753 static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev)
3754 {
3755 	int vmid;
3756 
3757 	/*
3758 	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
3759 	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
3760 	 * the driver can enable them for graphics. VMID0 should maintain
3761 	 * access so that HWS firmware can save/restore entries.
3762 	 */
3763 	for (vmid = 1; vmid < 16; vmid++) {
3764 		WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
3765 		WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
3766 		WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
3767 		WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
3768 	}
3769 }
3770 
3771 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3772 {
3773 	switch (adev->asic_type) {
3774 	default:
3775 		adev->gfx.config.double_offchip_lds_buf = 1;
3776 		break;
3777 	case CHIP_CARRIZO:
3778 	case CHIP_STONEY:
3779 		adev->gfx.config.double_offchip_lds_buf = 0;
3780 		break;
3781 	}
3782 }
3783 
3784 static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3785 {
3786 	u32 tmp, sh_static_mem_cfg;
3787 	int i;
3788 
3789 	WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3790 	WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3791 	WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3792 	WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3793 
3794 	gfx_v8_0_tiling_mode_table_init(adev);
3795 	gfx_v8_0_setup_rb(adev);
3796 	gfx_v8_0_get_cu_info(adev);
3797 	gfx_v8_0_config_init(adev);
3798 
3799 	/* XXX SH_MEM regs */
3800 	/* where to put LDS, scratch, GPUVM in FSA64 space */
3801 	sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3802 				   SWIZZLE_ENABLE, 1);
3803 	sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3804 				   ELEMENT_SIZE, 1);
3805 	sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3806 				   INDEX_STRIDE, 3);
3807 	WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3808 
3809 	mutex_lock(&adev->srbm_mutex);
3810 	for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3811 		vi_srbm_select(adev, 0, 0, 0, i);
3812 		/* CP and shaders */
3813 		if (i == 0) {
3814 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3815 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3816 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3817 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3818 			WREG32(mmSH_MEM_CONFIG, tmp);
3819 			WREG32(mmSH_MEM_BASES, 0);
3820 		} else {
3821 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3822 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3823 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3824 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3825 			WREG32(mmSH_MEM_CONFIG, tmp);
3826 			tmp = adev->gmc.shared_aperture_start >> 48;
3827 			WREG32(mmSH_MEM_BASES, tmp);
3828 		}
3829 
3830 		WREG32(mmSH_MEM_APE1_BASE, 1);
3831 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3832 	}
3833 	vi_srbm_select(adev, 0, 0, 0, 0);
3834 	mutex_unlock(&adev->srbm_mutex);
3835 
3836 	gfx_v8_0_init_compute_vmid(adev);
3837 	gfx_v8_0_init_gds_vmid(adev);
3838 
3839 	mutex_lock(&adev->grbm_idx_mutex);
3840 	/*
3841 	 * making sure that the following register writes will be broadcasted
3842 	 * to all the shaders
3843 	 */
3844 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3845 
3846 	WREG32(mmPA_SC_FIFO_SIZE,
3847 		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
3848 			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3849 		   (adev->gfx.config.sc_prim_fifo_size_backend <<
3850 			PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3851 		   (adev->gfx.config.sc_hiz_tile_fifo_size <<
3852 			PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3853 		   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3854 			PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3855 
3856 	tmp = RREG32(mmSPI_ARB_PRIORITY);
3857 	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3858 	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3859 	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3860 	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3861 	WREG32(mmSPI_ARB_PRIORITY, tmp);
3862 
3863 	mutex_unlock(&adev->grbm_idx_mutex);
3864 
3865 }
3866 
3867 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3868 {
3869 	u32 i, j, k;
3870 	u32 mask;
3871 
3872 	mutex_lock(&adev->grbm_idx_mutex);
3873 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3874 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3875 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3876 			for (k = 0; k < adev->usec_timeout; k++) {
3877 				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3878 					break;
3879 				udelay(1);
3880 			}
3881 			if (k == adev->usec_timeout) {
3882 				gfx_v8_0_select_se_sh(adev, 0xffffffff,
3883 						      0xffffffff, 0xffffffff);
3884 				mutex_unlock(&adev->grbm_idx_mutex);
3885 				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3886 					 i, j);
3887 				return;
3888 			}
3889 		}
3890 	}
3891 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3892 	mutex_unlock(&adev->grbm_idx_mutex);
3893 
3894 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3895 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3896 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3897 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3898 	for (k = 0; k < adev->usec_timeout; k++) {
3899 		if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3900 			break;
3901 		udelay(1);
3902 	}
3903 }
3904 
3905 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3906 					       bool enable)
3907 {
3908 	u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3909 
3910 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3911 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3912 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3913 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3914 
3915 	WREG32(mmCP_INT_CNTL_RING0, tmp);
3916 }
3917 
3918 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3919 {
3920 	/* csib */
3921 	WREG32(mmRLC_CSIB_ADDR_HI,
3922 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
3923 	WREG32(mmRLC_CSIB_ADDR_LO,
3924 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3925 	WREG32(mmRLC_CSIB_LENGTH,
3926 			adev->gfx.rlc.clear_state_size);
3927 }
3928 
3929 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3930 				int ind_offset,
3931 				int list_size,
3932 				int *unique_indices,
3933 				int *indices_count,
3934 				int max_indices,
3935 				int *ind_start_offsets,
3936 				int *offset_count,
3937 				int max_offset)
3938 {
3939 	int indices;
3940 	bool new_entry = true;
3941 
3942 	for (; ind_offset < list_size; ind_offset++) {
3943 
3944 		if (new_entry) {
3945 			new_entry = false;
3946 			ind_start_offsets[*offset_count] = ind_offset;
3947 			*offset_count = *offset_count + 1;
3948 			BUG_ON(*offset_count >= max_offset);
3949 		}
3950 
3951 		if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3952 			new_entry = true;
3953 			continue;
3954 		}
3955 
3956 		ind_offset += 2;
3957 
3958 		/* look for the matching indice */
3959 		for (indices = 0;
3960 			indices < *indices_count;
3961 			indices++) {
3962 			if (unique_indices[indices] ==
3963 				register_list_format[ind_offset])
3964 				break;
3965 		}
3966 
3967 		if (indices >= *indices_count) {
3968 			unique_indices[*indices_count] =
3969 				register_list_format[ind_offset];
3970 			indices = *indices_count;
3971 			*indices_count = *indices_count + 1;
3972 			BUG_ON(*indices_count >= max_indices);
3973 		}
3974 
3975 		register_list_format[ind_offset] = indices;
3976 	}
3977 }
3978 
3979 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3980 {
3981 	int i, temp, data;
3982 	int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3983 	int indices_count = 0;
3984 	int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3985 	int offset_count = 0;
3986 
3987 	int list_size;
3988 	unsigned int *register_list_format =
3989 		kmemdup(adev->gfx.rlc.register_list_format,
3990 			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3991 	if (!register_list_format)
3992 		return -ENOMEM;
3993 
3994 	gfx_v8_0_parse_ind_reg_list(register_list_format,
3995 				RLC_FormatDirectRegListLength,
3996 				adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3997 				unique_indices,
3998 				&indices_count,
3999 				ARRAY_SIZE(unique_indices),
4000 				indirect_start_offsets,
4001 				&offset_count,
4002 				ARRAY_SIZE(indirect_start_offsets));
4003 
4004 	/* save and restore list */
4005 	WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
4006 
4007 	WREG32(mmRLC_SRM_ARAM_ADDR, 0);
4008 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
4009 		WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4010 
4011 	/* indirect list */
4012 	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4013 	for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4014 		WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4015 
4016 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4017 	list_size = list_size >> 1;
4018 	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4019 	WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4020 
4021 	/* starting offsets starts */
4022 	WREG32(mmRLC_GPM_SCRATCH_ADDR,
4023 		adev->gfx.rlc.starting_offsets_start);
4024 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
4025 		WREG32(mmRLC_GPM_SCRATCH_DATA,
4026 				indirect_start_offsets[i]);
4027 
4028 	/* unique indices */
4029 	temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4030 	data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4031 	for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4032 		if (unique_indices[i] != 0) {
4033 			WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4034 			WREG32(data + i, unique_indices[i] >> 20);
4035 		}
4036 	}
4037 	kfree(register_list_format);
4038 
4039 	return 0;
4040 }
4041 
4042 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4043 {
4044 	WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4045 }
4046 
4047 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4048 {
4049 	uint32_t data;
4050 
4051 	WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4052 
4053 	data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4054 	data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4055 	data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4056 	data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4057 	WREG32(mmRLC_PG_DELAY, data);
4058 
4059 	WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4060 	WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4061 
4062 }
4063 
4064 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4065 						bool enable)
4066 {
4067 	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4068 }
4069 
4070 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4071 						  bool enable)
4072 {
4073 	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4074 }
4075 
4076 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4077 {
4078 	WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4079 }
4080 
4081 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4082 {
4083 	if ((adev->asic_type == CHIP_CARRIZO) ||
4084 	    (adev->asic_type == CHIP_STONEY)) {
4085 		gfx_v8_0_init_csb(adev);
4086 		gfx_v8_0_init_save_restore_list(adev);
4087 		gfx_v8_0_enable_save_restore_machine(adev);
4088 		WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4089 		gfx_v8_0_init_power_gating(adev);
4090 		WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4091 	} else if ((adev->asic_type == CHIP_POLARIS11) ||
4092 		   (adev->asic_type == CHIP_POLARIS12) ||
4093 		   (adev->asic_type == CHIP_VEGAM)) {
4094 		gfx_v8_0_init_csb(adev);
4095 		gfx_v8_0_init_save_restore_list(adev);
4096 		gfx_v8_0_enable_save_restore_machine(adev);
4097 		gfx_v8_0_init_power_gating(adev);
4098 	}
4099 
4100 }
4101 
4102 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4103 {
4104 	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4105 
4106 	gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4107 	gfx_v8_0_wait_for_rlc_serdes(adev);
4108 }
4109 
4110 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4111 {
4112 	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4113 	udelay(50);
4114 
4115 	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4116 	udelay(50);
4117 }
4118 
4119 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4120 {
4121 	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4122 
4123 	/* carrizo do enable cp interrupt after cp inited */
4124 	if (!(adev->flags & AMD_IS_APU))
4125 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4126 
4127 	udelay(50);
4128 }
4129 
4130 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4131 {
4132 	if (amdgpu_sriov_vf(adev)) {
4133 		gfx_v8_0_init_csb(adev);
4134 		return 0;
4135 	}
4136 
4137 	adev->gfx.rlc.funcs->stop(adev);
4138 	adev->gfx.rlc.funcs->reset(adev);
4139 	gfx_v8_0_init_pg(adev);
4140 	adev->gfx.rlc.funcs->start(adev);
4141 
4142 	return 0;
4143 }
4144 
4145 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4146 {
4147 	int i;
4148 	u32 tmp = RREG32(mmCP_ME_CNTL);
4149 
4150 	if (enable) {
4151 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4152 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4153 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4154 	} else {
4155 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4156 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4157 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4158 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4159 			adev->gfx.gfx_ring[i].sched.ready = false;
4160 	}
4161 	WREG32(mmCP_ME_CNTL, tmp);
4162 	udelay(50);
4163 }
4164 
4165 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4166 {
4167 	u32 count = 0;
4168 	const struct cs_section_def *sect = NULL;
4169 	const struct cs_extent_def *ext = NULL;
4170 
4171 	/* begin clear state */
4172 	count += 2;
4173 	/* context control state */
4174 	count += 3;
4175 
4176 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4177 		for (ext = sect->section; ext->extent != NULL; ++ext) {
4178 			if (sect->id == SECT_CONTEXT)
4179 				count += 2 + ext->reg_count;
4180 			else
4181 				return 0;
4182 		}
4183 	}
4184 	/* pa_sc_raster_config/pa_sc_raster_config1 */
4185 	count += 4;
4186 	/* end clear state */
4187 	count += 2;
4188 	/* clear state */
4189 	count += 2;
4190 
4191 	return count;
4192 }
4193 
4194 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4195 {
4196 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4197 	const struct cs_section_def *sect = NULL;
4198 	const struct cs_extent_def *ext = NULL;
4199 	int r, i;
4200 
4201 	/* init the CP */
4202 	WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4203 	WREG32(mmCP_ENDIAN_SWAP, 0);
4204 	WREG32(mmCP_DEVICE_ID, 1);
4205 
4206 	gfx_v8_0_cp_gfx_enable(adev, true);
4207 
4208 	r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4209 	if (r) {
4210 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4211 		return r;
4212 	}
4213 
4214 	/* clear state buffer */
4215 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4216 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4217 
4218 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4219 	amdgpu_ring_write(ring, 0x80000000);
4220 	amdgpu_ring_write(ring, 0x80000000);
4221 
4222 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4223 		for (ext = sect->section; ext->extent != NULL; ++ext) {
4224 			if (sect->id == SECT_CONTEXT) {
4225 				amdgpu_ring_write(ring,
4226 				       PACKET3(PACKET3_SET_CONTEXT_REG,
4227 					       ext->reg_count));
4228 				amdgpu_ring_write(ring,
4229 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4230 				for (i = 0; i < ext->reg_count; i++)
4231 					amdgpu_ring_write(ring, ext->extent[i]);
4232 			}
4233 		}
4234 	}
4235 
4236 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4237 	amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4238 	amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4239 	amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4240 
4241 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4242 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4243 
4244 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4245 	amdgpu_ring_write(ring, 0);
4246 
4247 	/* init the CE partitions */
4248 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4249 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4250 	amdgpu_ring_write(ring, 0x8000);
4251 	amdgpu_ring_write(ring, 0x8000);
4252 
4253 	amdgpu_ring_commit(ring);
4254 
4255 	return 0;
4256 }
4257 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4258 {
4259 	u32 tmp;
4260 	/* no gfx doorbells on iceland */
4261 	if (adev->asic_type == CHIP_TOPAZ)
4262 		return;
4263 
4264 	tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4265 
4266 	if (ring->use_doorbell) {
4267 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4268 				DOORBELL_OFFSET, ring->doorbell_index);
4269 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4270 						DOORBELL_HIT, 0);
4271 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4272 					    DOORBELL_EN, 1);
4273 	} else {
4274 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4275 	}
4276 
4277 	WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4278 
4279 	if (adev->flags & AMD_IS_APU)
4280 		return;
4281 
4282 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4283 					DOORBELL_RANGE_LOWER,
4284 					adev->doorbell_index.gfx_ring0);
4285 	WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4286 
4287 	WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4288 		CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4289 }
4290 
4291 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4292 {
4293 	struct amdgpu_ring *ring;
4294 	u32 tmp;
4295 	u32 rb_bufsz;
4296 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
4297 
4298 	/* Set the write pointer delay */
4299 	WREG32(mmCP_RB_WPTR_DELAY, 0);
4300 
4301 	/* set the RB to use vmid 0 */
4302 	WREG32(mmCP_RB_VMID, 0);
4303 
4304 	/* Set ring buffer size */
4305 	ring = &adev->gfx.gfx_ring[0];
4306 	rb_bufsz = order_base_2(ring->ring_size / 8);
4307 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4308 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4309 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4310 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4311 #ifdef __BIG_ENDIAN
4312 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4313 #endif
4314 	WREG32(mmCP_RB0_CNTL, tmp);
4315 
4316 	/* Initialize the ring buffer's read and write pointers */
4317 	WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4318 	ring->wptr = 0;
4319 	WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4320 
4321 	/* set the wb address wether it's enabled or not */
4322 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4323 	WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4324 	WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4325 
4326 	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4327 	WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4328 	WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4329 	mdelay(1);
4330 	WREG32(mmCP_RB0_CNTL, tmp);
4331 
4332 	rb_addr = ring->gpu_addr >> 8;
4333 	WREG32(mmCP_RB0_BASE, rb_addr);
4334 	WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4335 
4336 	gfx_v8_0_set_cpg_door_bell(adev, ring);
4337 	/* start the ring */
4338 	amdgpu_ring_clear_ring(ring);
4339 	gfx_v8_0_cp_gfx_start(adev);
4340 	ring->sched.ready = true;
4341 
4342 	return 0;
4343 }
4344 
4345 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4346 {
4347 	int i;
4348 
4349 	if (enable) {
4350 		WREG32(mmCP_MEC_CNTL, 0);
4351 	} else {
4352 		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4353 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
4354 			adev->gfx.compute_ring[i].sched.ready = false;
4355 		adev->gfx.kiq.ring.sched.ready = false;
4356 	}
4357 	udelay(50);
4358 }
4359 
4360 /* KIQ functions */
4361 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4362 {
4363 	uint32_t tmp;
4364 	struct amdgpu_device *adev = ring->adev;
4365 
4366 	/* tell RLC which is KIQ queue */
4367 	tmp = RREG32(mmRLC_CP_SCHEDULERS);
4368 	tmp &= 0xffffff00;
4369 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4370 	WREG32(mmRLC_CP_SCHEDULERS, tmp);
4371 	tmp |= 0x80;
4372 	WREG32(mmRLC_CP_SCHEDULERS, tmp);
4373 }
4374 
4375 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4376 {
4377 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4378 	uint64_t queue_mask = 0;
4379 	int r, i;
4380 
4381 	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4382 		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4383 			continue;
4384 
4385 		/* This situation may be hit in the future if a new HW
4386 		 * generation exposes more than 64 queues. If so, the
4387 		 * definition of queue_mask needs updating */
4388 		if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4389 			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4390 			break;
4391 		}
4392 
4393 		queue_mask |= (1ull << i);
4394 	}
4395 
4396 	r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
4397 	if (r) {
4398 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4399 		return r;
4400 	}
4401 	/* set resources */
4402 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4403 	amdgpu_ring_write(kiq_ring, 0);	/* vmid_mask:0 queue_type:0 (KIQ) */
4404 	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
4405 	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
4406 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
4407 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
4408 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
4409 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
4410 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4411 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4412 		uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4413 		uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4414 
4415 		/* map queues */
4416 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4417 		/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4418 		amdgpu_ring_write(kiq_ring,
4419 				  PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4420 		amdgpu_ring_write(kiq_ring,
4421 				  PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4422 				  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4423 				  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4424 				  PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4425 		amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4426 		amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4427 		amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4428 		amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4429 	}
4430 
4431 	amdgpu_ring_commit(kiq_ring);
4432 
4433 	return 0;
4434 }
4435 
4436 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4437 {
4438 	int i, r = 0;
4439 
4440 	if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4441 		WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4442 		for (i = 0; i < adev->usec_timeout; i++) {
4443 			if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4444 				break;
4445 			udelay(1);
4446 		}
4447 		if (i == adev->usec_timeout)
4448 			r = -ETIMEDOUT;
4449 	}
4450 	WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4451 	WREG32(mmCP_HQD_PQ_RPTR, 0);
4452 	WREG32(mmCP_HQD_PQ_WPTR, 0);
4453 
4454 	return r;
4455 }
4456 
4457 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4458 {
4459 	struct amdgpu_device *adev = ring->adev;
4460 	struct vi_mqd *mqd = ring->mqd_ptr;
4461 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4462 	uint32_t tmp;
4463 
4464 	mqd->header = 0xC0310800;
4465 	mqd->compute_pipelinestat_enable = 0x00000001;
4466 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4467 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4468 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4469 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4470 	mqd->compute_misc_reserved = 0x00000003;
4471 	mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4472 						     + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4473 	mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4474 						     + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4475 	eop_base_addr = ring->eop_gpu_addr >> 8;
4476 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4477 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4478 
4479 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4480 	tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4481 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4482 			(order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4483 
4484 	mqd->cp_hqd_eop_control = tmp;
4485 
4486 	/* enable doorbell? */
4487 	tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4488 			    CP_HQD_PQ_DOORBELL_CONTROL,
4489 			    DOORBELL_EN,
4490 			    ring->use_doorbell ? 1 : 0);
4491 
4492 	mqd->cp_hqd_pq_doorbell_control = tmp;
4493 
4494 	/* set the pointer to the MQD */
4495 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4496 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4497 
4498 	/* set MQD vmid to 0 */
4499 	tmp = RREG32(mmCP_MQD_CONTROL);
4500 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4501 	mqd->cp_mqd_control = tmp;
4502 
4503 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4504 	hqd_gpu_addr = ring->gpu_addr >> 8;
4505 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4506 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4507 
4508 	/* set up the HQD, this is similar to CP_RB0_CNTL */
4509 	tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4510 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4511 			    (order_base_2(ring->ring_size / 4) - 1));
4512 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4513 			((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4514 #ifdef __BIG_ENDIAN
4515 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4516 #endif
4517 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4518 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4519 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4520 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4521 	mqd->cp_hqd_pq_control = tmp;
4522 
4523 	/* set the wb address whether it's enabled or not */
4524 	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4525 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4526 	mqd->cp_hqd_pq_rptr_report_addr_hi =
4527 		upper_32_bits(wb_gpu_addr) & 0xffff;
4528 
4529 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4530 	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4531 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4532 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4533 
4534 	tmp = 0;
4535 	/* enable the doorbell if requested */
4536 	if (ring->use_doorbell) {
4537 		tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4538 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4539 				DOORBELL_OFFSET, ring->doorbell_index);
4540 
4541 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4542 					 DOORBELL_EN, 1);
4543 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4544 					 DOORBELL_SOURCE, 0);
4545 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4546 					 DOORBELL_HIT, 0);
4547 	}
4548 
4549 	mqd->cp_hqd_pq_doorbell_control = tmp;
4550 
4551 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4552 	ring->wptr = 0;
4553 	mqd->cp_hqd_pq_wptr = ring->wptr;
4554 	mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4555 
4556 	/* set the vmid for the queue */
4557 	mqd->cp_hqd_vmid = 0;
4558 
4559 	tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4560 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4561 	mqd->cp_hqd_persistent_state = tmp;
4562 
4563 	/* set MTYPE */
4564 	tmp = RREG32(mmCP_HQD_IB_CONTROL);
4565 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4566 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4567 	mqd->cp_hqd_ib_control = tmp;
4568 
4569 	tmp = RREG32(mmCP_HQD_IQ_TIMER);
4570 	tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4571 	mqd->cp_hqd_iq_timer = tmp;
4572 
4573 	tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4574 	tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4575 	mqd->cp_hqd_ctx_save_control = tmp;
4576 
4577 	/* defaults */
4578 	mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4579 	mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4580 	mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4581 	mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4582 	mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4583 	mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4584 	mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4585 	mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4586 	mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4587 	mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4588 	mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4589 	mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4590 	mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4591 	mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4592 	mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4593 
4594 	/* activate the queue */
4595 	mqd->cp_hqd_active = 1;
4596 
4597 	return 0;
4598 }
4599 
4600 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4601 			struct vi_mqd *mqd)
4602 {
4603 	uint32_t mqd_reg;
4604 	uint32_t *mqd_data;
4605 
4606 	/* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4607 	mqd_data = &mqd->cp_mqd_base_addr_lo;
4608 
4609 	/* disable wptr polling */
4610 	WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4611 
4612 	/* program all HQD registers */
4613 	for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4614 		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4615 
4616 	/* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4617 	 * This is safe since EOP RPTR==WPTR for any inactive HQD
4618 	 * on ASICs that do not support context-save.
4619 	 * EOP writes/reads can start anywhere in the ring.
4620 	 */
4621 	if (adev->asic_type != CHIP_TONGA) {
4622 		WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4623 		WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4624 		WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4625 	}
4626 
4627 	for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4628 		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4629 
4630 	/* activate the HQD */
4631 	for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4632 		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4633 
4634 	return 0;
4635 }
4636 
4637 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4638 {
4639 	struct amdgpu_device *adev = ring->adev;
4640 	struct vi_mqd *mqd = ring->mqd_ptr;
4641 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4642 
4643 	gfx_v8_0_kiq_setting(ring);
4644 
4645 	if (adev->in_gpu_reset) { /* for GPU_RESET case */
4646 		/* reset MQD to a clean status */
4647 		if (adev->gfx.mec.mqd_backup[mqd_idx])
4648 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4649 
4650 		/* reset ring buffer */
4651 		ring->wptr = 0;
4652 		amdgpu_ring_clear_ring(ring);
4653 		mutex_lock(&adev->srbm_mutex);
4654 		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4655 		gfx_v8_0_mqd_commit(adev, mqd);
4656 		vi_srbm_select(adev, 0, 0, 0, 0);
4657 		mutex_unlock(&adev->srbm_mutex);
4658 	} else {
4659 		memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4660 		((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4661 		((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4662 		mutex_lock(&adev->srbm_mutex);
4663 		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4664 		gfx_v8_0_mqd_init(ring);
4665 		gfx_v8_0_mqd_commit(adev, mqd);
4666 		vi_srbm_select(adev, 0, 0, 0, 0);
4667 		mutex_unlock(&adev->srbm_mutex);
4668 
4669 		if (adev->gfx.mec.mqd_backup[mqd_idx])
4670 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4671 	}
4672 
4673 	return 0;
4674 }
4675 
4676 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4677 {
4678 	struct amdgpu_device *adev = ring->adev;
4679 	struct vi_mqd *mqd = ring->mqd_ptr;
4680 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
4681 
4682 	if (!adev->in_gpu_reset && !adev->in_suspend) {
4683 		memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4684 		((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4685 		((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4686 		mutex_lock(&adev->srbm_mutex);
4687 		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4688 		gfx_v8_0_mqd_init(ring);
4689 		vi_srbm_select(adev, 0, 0, 0, 0);
4690 		mutex_unlock(&adev->srbm_mutex);
4691 
4692 		if (adev->gfx.mec.mqd_backup[mqd_idx])
4693 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4694 	} else if (adev->in_gpu_reset) { /* for GPU_RESET case */
4695 		/* reset MQD to a clean status */
4696 		if (adev->gfx.mec.mqd_backup[mqd_idx])
4697 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4698 		/* reset ring buffer */
4699 		ring->wptr = 0;
4700 		amdgpu_ring_clear_ring(ring);
4701 	} else {
4702 		amdgpu_ring_clear_ring(ring);
4703 	}
4704 	return 0;
4705 }
4706 
4707 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4708 {
4709 	if (adev->asic_type > CHIP_TONGA) {
4710 		WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2);
4711 		WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2);
4712 	}
4713 	/* enable doorbells */
4714 	WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4715 }
4716 
4717 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4718 {
4719 	struct amdgpu_ring *ring;
4720 	int r;
4721 
4722 	ring = &adev->gfx.kiq.ring;
4723 
4724 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
4725 	if (unlikely(r != 0))
4726 		return r;
4727 
4728 	r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4729 	if (unlikely(r != 0))
4730 		return r;
4731 
4732 	gfx_v8_0_kiq_init_queue(ring);
4733 	amdgpu_bo_kunmap(ring->mqd_obj);
4734 	ring->mqd_ptr = NULL;
4735 	amdgpu_bo_unreserve(ring->mqd_obj);
4736 	ring->sched.ready = true;
4737 	return 0;
4738 }
4739 
4740 static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4741 {
4742 	struct amdgpu_ring *ring = NULL;
4743 	int r = 0, i;
4744 
4745 	gfx_v8_0_cp_compute_enable(adev, true);
4746 
4747 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4748 		ring = &adev->gfx.compute_ring[i];
4749 
4750 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
4751 		if (unlikely(r != 0))
4752 			goto done;
4753 		r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4754 		if (!r) {
4755 			r = gfx_v8_0_kcq_init_queue(ring);
4756 			amdgpu_bo_kunmap(ring->mqd_obj);
4757 			ring->mqd_ptr = NULL;
4758 		}
4759 		amdgpu_bo_unreserve(ring->mqd_obj);
4760 		if (r)
4761 			goto done;
4762 	}
4763 
4764 	gfx_v8_0_set_mec_doorbell_range(adev);
4765 
4766 	r = gfx_v8_0_kiq_kcq_enable(adev);
4767 	if (r)
4768 		goto done;
4769 
4770 done:
4771 	return r;
4772 }
4773 
4774 static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
4775 {
4776 	int r, i;
4777 	struct amdgpu_ring *ring;
4778 
4779 	/* collect all the ring_tests here, gfx, kiq, compute */
4780 	ring = &adev->gfx.gfx_ring[0];
4781 	r = amdgpu_ring_test_helper(ring);
4782 	if (r)
4783 		return r;
4784 
4785 	ring = &adev->gfx.kiq.ring;
4786 	r = amdgpu_ring_test_helper(ring);
4787 	if (r)
4788 		return r;
4789 
4790 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4791 		ring = &adev->gfx.compute_ring[i];
4792 		amdgpu_ring_test_helper(ring);
4793 	}
4794 
4795 	return 0;
4796 }
4797 
4798 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4799 {
4800 	int r;
4801 
4802 	if (!(adev->flags & AMD_IS_APU))
4803 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4804 
4805 	r = gfx_v8_0_kiq_resume(adev);
4806 	if (r)
4807 		return r;
4808 
4809 	r = gfx_v8_0_cp_gfx_resume(adev);
4810 	if (r)
4811 		return r;
4812 
4813 	r = gfx_v8_0_kcq_resume(adev);
4814 	if (r)
4815 		return r;
4816 
4817 	r = gfx_v8_0_cp_test_all_rings(adev);
4818 	if (r)
4819 		return r;
4820 
4821 	gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4822 
4823 	return 0;
4824 }
4825 
4826 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4827 {
4828 	gfx_v8_0_cp_gfx_enable(adev, enable);
4829 	gfx_v8_0_cp_compute_enable(adev, enable);
4830 }
4831 
4832 static int gfx_v8_0_hw_init(void *handle)
4833 {
4834 	int r;
4835 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4836 
4837 	gfx_v8_0_init_golden_registers(adev);
4838 	gfx_v8_0_constants_init(adev);
4839 
4840 	r = gfx_v8_0_csb_vram_pin(adev);
4841 	if (r)
4842 		return r;
4843 
4844 	r = adev->gfx.rlc.funcs->resume(adev);
4845 	if (r)
4846 		return r;
4847 
4848 	r = gfx_v8_0_cp_resume(adev);
4849 
4850 	return r;
4851 }
4852 
4853 static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
4854 {
4855 	int r, i;
4856 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4857 
4858 	r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
4859 	if (r)
4860 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4861 
4862 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4863 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4864 
4865 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4866 		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
4867 						PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
4868 						PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
4869 						PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
4870 						PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
4871 		amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
4872 		amdgpu_ring_write(kiq_ring, 0);
4873 		amdgpu_ring_write(kiq_ring, 0);
4874 		amdgpu_ring_write(kiq_ring, 0);
4875 	}
4876 	r = amdgpu_ring_test_helper(kiq_ring);
4877 	if (r)
4878 		DRM_ERROR("KCQ disable failed\n");
4879 
4880 	return r;
4881 }
4882 
4883 static bool gfx_v8_0_is_idle(void *handle)
4884 {
4885 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4886 
4887 	if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
4888 		|| RREG32(mmGRBM_STATUS2) != 0x8)
4889 		return false;
4890 	else
4891 		return true;
4892 }
4893 
4894 static bool gfx_v8_0_rlc_is_idle(void *handle)
4895 {
4896 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4897 
4898 	if (RREG32(mmGRBM_STATUS2) != 0x8)
4899 		return false;
4900 	else
4901 		return true;
4902 }
4903 
4904 static int gfx_v8_0_wait_for_rlc_idle(void *handle)
4905 {
4906 	unsigned int i;
4907 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4908 
4909 	for (i = 0; i < adev->usec_timeout; i++) {
4910 		if (gfx_v8_0_rlc_is_idle(handle))
4911 			return 0;
4912 
4913 		udelay(1);
4914 	}
4915 	return -ETIMEDOUT;
4916 }
4917 
4918 static int gfx_v8_0_wait_for_idle(void *handle)
4919 {
4920 	unsigned int i;
4921 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4922 
4923 	for (i = 0; i < adev->usec_timeout; i++) {
4924 		if (gfx_v8_0_is_idle(handle))
4925 			return 0;
4926 
4927 		udelay(1);
4928 	}
4929 	return -ETIMEDOUT;
4930 }
4931 
4932 static int gfx_v8_0_hw_fini(void *handle)
4933 {
4934 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4935 
4936 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4937 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4938 
4939 	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4940 
4941 	amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
4942 
4943 	/* disable KCQ to avoid CPC touch memory not valid anymore */
4944 	gfx_v8_0_kcq_disable(adev);
4945 
4946 	if (amdgpu_sriov_vf(adev)) {
4947 		pr_debug("For SRIOV client, shouldn't do anything.\n");
4948 		return 0;
4949 	}
4950 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4951 	if (!gfx_v8_0_wait_for_idle(adev))
4952 		gfx_v8_0_cp_enable(adev, false);
4953 	else
4954 		pr_err("cp is busy, skip halt cp\n");
4955 	if (!gfx_v8_0_wait_for_rlc_idle(adev))
4956 		adev->gfx.rlc.funcs->stop(adev);
4957 	else
4958 		pr_err("rlc is busy, skip halt rlc\n");
4959 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4960 
4961 	gfx_v8_0_csb_vram_unpin(adev);
4962 
4963 	return 0;
4964 }
4965 
4966 static int gfx_v8_0_suspend(void *handle)
4967 {
4968 	return gfx_v8_0_hw_fini(handle);
4969 }
4970 
4971 static int gfx_v8_0_resume(void *handle)
4972 {
4973 	return gfx_v8_0_hw_init(handle);
4974 }
4975 
4976 static bool gfx_v8_0_check_soft_reset(void *handle)
4977 {
4978 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4979 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4980 	u32 tmp;
4981 
4982 	/* GRBM_STATUS */
4983 	tmp = RREG32(mmGRBM_STATUS);
4984 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4985 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4986 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4987 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4988 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4989 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4990 		   GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4991 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4992 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4993 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4994 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4995 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4996 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4997 	}
4998 
4999 	/* GRBM_STATUS2 */
5000 	tmp = RREG32(mmGRBM_STATUS2);
5001 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5002 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5003 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5004 
5005 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5006 	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5007 	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5008 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5009 						SOFT_RESET_CPF, 1);
5010 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5011 						SOFT_RESET_CPC, 1);
5012 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5013 						SOFT_RESET_CPG, 1);
5014 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5015 						SOFT_RESET_GRBM, 1);
5016 	}
5017 
5018 	/* SRBM_STATUS */
5019 	tmp = RREG32(mmSRBM_STATUS);
5020 	if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5021 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5022 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5023 	if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5024 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5025 						SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5026 
5027 	if (grbm_soft_reset || srbm_soft_reset) {
5028 		adev->gfx.grbm_soft_reset = grbm_soft_reset;
5029 		adev->gfx.srbm_soft_reset = srbm_soft_reset;
5030 		return true;
5031 	} else {
5032 		adev->gfx.grbm_soft_reset = 0;
5033 		adev->gfx.srbm_soft_reset = 0;
5034 		return false;
5035 	}
5036 }
5037 
5038 static int gfx_v8_0_pre_soft_reset(void *handle)
5039 {
5040 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5041 	u32 grbm_soft_reset = 0;
5042 
5043 	if ((!adev->gfx.grbm_soft_reset) &&
5044 	    (!adev->gfx.srbm_soft_reset))
5045 		return 0;
5046 
5047 	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5048 
5049 	/* stop the rlc */
5050 	adev->gfx.rlc.funcs->stop(adev);
5051 
5052 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5053 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5054 		/* Disable GFX parsing/prefetching */
5055 		gfx_v8_0_cp_gfx_enable(adev, false);
5056 
5057 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5058 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5059 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5060 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5061 		int i;
5062 
5063 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5064 			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5065 
5066 			mutex_lock(&adev->srbm_mutex);
5067 			vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5068 			gfx_v8_0_deactivate_hqd(adev, 2);
5069 			vi_srbm_select(adev, 0, 0, 0, 0);
5070 			mutex_unlock(&adev->srbm_mutex);
5071 		}
5072 		/* Disable MEC parsing/prefetching */
5073 		gfx_v8_0_cp_compute_enable(adev, false);
5074 	}
5075 
5076        return 0;
5077 }
5078 
5079 static int gfx_v8_0_soft_reset(void *handle)
5080 {
5081 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5082 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5083 	u32 tmp;
5084 
5085 	if ((!adev->gfx.grbm_soft_reset) &&
5086 	    (!adev->gfx.srbm_soft_reset))
5087 		return 0;
5088 
5089 	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5090 	srbm_soft_reset = adev->gfx.srbm_soft_reset;
5091 
5092 	if (grbm_soft_reset || srbm_soft_reset) {
5093 		tmp = RREG32(mmGMCON_DEBUG);
5094 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5095 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5096 		WREG32(mmGMCON_DEBUG, tmp);
5097 		udelay(50);
5098 	}
5099 
5100 	if (grbm_soft_reset) {
5101 		tmp = RREG32(mmGRBM_SOFT_RESET);
5102 		tmp |= grbm_soft_reset;
5103 		dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5104 		WREG32(mmGRBM_SOFT_RESET, tmp);
5105 		tmp = RREG32(mmGRBM_SOFT_RESET);
5106 
5107 		udelay(50);
5108 
5109 		tmp &= ~grbm_soft_reset;
5110 		WREG32(mmGRBM_SOFT_RESET, tmp);
5111 		tmp = RREG32(mmGRBM_SOFT_RESET);
5112 	}
5113 
5114 	if (srbm_soft_reset) {
5115 		tmp = RREG32(mmSRBM_SOFT_RESET);
5116 		tmp |= srbm_soft_reset;
5117 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5118 		WREG32(mmSRBM_SOFT_RESET, tmp);
5119 		tmp = RREG32(mmSRBM_SOFT_RESET);
5120 
5121 		udelay(50);
5122 
5123 		tmp &= ~srbm_soft_reset;
5124 		WREG32(mmSRBM_SOFT_RESET, tmp);
5125 		tmp = RREG32(mmSRBM_SOFT_RESET);
5126 	}
5127 
5128 	if (grbm_soft_reset || srbm_soft_reset) {
5129 		tmp = RREG32(mmGMCON_DEBUG);
5130 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5131 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5132 		WREG32(mmGMCON_DEBUG, tmp);
5133 	}
5134 
5135 	/* Wait a little for things to settle down */
5136 	udelay(50);
5137 
5138 	return 0;
5139 }
5140 
5141 static int gfx_v8_0_post_soft_reset(void *handle)
5142 {
5143 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5144 	u32 grbm_soft_reset = 0;
5145 
5146 	if ((!adev->gfx.grbm_soft_reset) &&
5147 	    (!adev->gfx.srbm_soft_reset))
5148 		return 0;
5149 
5150 	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5151 
5152 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5153 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5154 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5155 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5156 		int i;
5157 
5158 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5159 			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5160 
5161 			mutex_lock(&adev->srbm_mutex);
5162 			vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5163 			gfx_v8_0_deactivate_hqd(adev, 2);
5164 			vi_srbm_select(adev, 0, 0, 0, 0);
5165 			mutex_unlock(&adev->srbm_mutex);
5166 		}
5167 		gfx_v8_0_kiq_resume(adev);
5168 		gfx_v8_0_kcq_resume(adev);
5169 	}
5170 
5171 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5172 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5173 		gfx_v8_0_cp_gfx_resume(adev);
5174 
5175 	gfx_v8_0_cp_test_all_rings(adev);
5176 
5177 	adev->gfx.rlc.funcs->start(adev);
5178 
5179 	return 0;
5180 }
5181 
5182 /**
5183  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5184  *
5185  * @adev: amdgpu_device pointer
5186  *
5187  * Fetches a GPU clock counter snapshot.
5188  * Returns the 64 bit clock counter snapshot.
5189  */
5190 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5191 {
5192 	uint64_t clock;
5193 
5194 	mutex_lock(&adev->gfx.gpu_clock_mutex);
5195 	WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5196 	clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5197 		((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5198 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
5199 	return clock;
5200 }
5201 
5202 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5203 					  uint32_t vmid,
5204 					  uint32_t gds_base, uint32_t gds_size,
5205 					  uint32_t gws_base, uint32_t gws_size,
5206 					  uint32_t oa_base, uint32_t oa_size)
5207 {
5208 	/* GDS Base */
5209 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5210 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5211 				WRITE_DATA_DST_SEL(0)));
5212 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5213 	amdgpu_ring_write(ring, 0);
5214 	amdgpu_ring_write(ring, gds_base);
5215 
5216 	/* GDS Size */
5217 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5218 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5219 				WRITE_DATA_DST_SEL(0)));
5220 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5221 	amdgpu_ring_write(ring, 0);
5222 	amdgpu_ring_write(ring, gds_size);
5223 
5224 	/* GWS */
5225 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5226 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5227 				WRITE_DATA_DST_SEL(0)));
5228 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5229 	amdgpu_ring_write(ring, 0);
5230 	amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5231 
5232 	/* OA */
5233 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5234 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5235 				WRITE_DATA_DST_SEL(0)));
5236 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5237 	amdgpu_ring_write(ring, 0);
5238 	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5239 }
5240 
5241 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5242 {
5243 	WREG32(mmSQ_IND_INDEX,
5244 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5245 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5246 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
5247 		(SQ_IND_INDEX__FORCE_READ_MASK));
5248 	return RREG32(mmSQ_IND_DATA);
5249 }
5250 
5251 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5252 			   uint32_t wave, uint32_t thread,
5253 			   uint32_t regno, uint32_t num, uint32_t *out)
5254 {
5255 	WREG32(mmSQ_IND_INDEX,
5256 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5257 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5258 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
5259 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5260 		(SQ_IND_INDEX__FORCE_READ_MASK) |
5261 		(SQ_IND_INDEX__AUTO_INCR_MASK));
5262 	while (num--)
5263 		*(out++) = RREG32(mmSQ_IND_DATA);
5264 }
5265 
5266 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5267 {
5268 	/* type 0 wave data */
5269 	dst[(*no_fields)++] = 0;
5270 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5271 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5272 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5273 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5274 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5275 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5276 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5277 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5278 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5279 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5280 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5281 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5282 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5283 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5284 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5285 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5286 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5287 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5288 }
5289 
5290 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5291 				     uint32_t wave, uint32_t start,
5292 				     uint32_t size, uint32_t *dst)
5293 {
5294 	wave_read_regs(
5295 		adev, simd, wave, 0,
5296 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5297 }
5298 
5299 
5300 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5301 	.get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5302 	.select_se_sh = &gfx_v8_0_select_se_sh,
5303 	.read_wave_data = &gfx_v8_0_read_wave_data,
5304 	.read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5305 	.select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5306 };
5307 
5308 static int gfx_v8_0_early_init(void *handle)
5309 {
5310 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5311 
5312 	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5313 	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5314 	adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5315 	gfx_v8_0_set_ring_funcs(adev);
5316 	gfx_v8_0_set_irq_funcs(adev);
5317 	gfx_v8_0_set_gds_init(adev);
5318 	gfx_v8_0_set_rlc_funcs(adev);
5319 
5320 	return 0;
5321 }
5322 
5323 static int gfx_v8_0_late_init(void *handle)
5324 {
5325 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5326 	int r;
5327 
5328 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5329 	if (r)
5330 		return r;
5331 
5332 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5333 	if (r)
5334 		return r;
5335 
5336 	/* requires IBs so do in late init after IB pool is initialized */
5337 	r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5338 	if (r)
5339 		return r;
5340 
5341 	r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5342 	if (r) {
5343 		DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5344 		return r;
5345 	}
5346 
5347 	r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5348 	if (r) {
5349 		DRM_ERROR(
5350 			"amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5351 			r);
5352 		return r;
5353 	}
5354 
5355 	return 0;
5356 }
5357 
5358 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5359 						       bool enable)
5360 {
5361 	if (((adev->asic_type == CHIP_POLARIS11) ||
5362 	    (adev->asic_type == CHIP_POLARIS12) ||
5363 	    (adev->asic_type == CHIP_VEGAM)) &&
5364 	    adev->powerplay.pp_funcs->set_powergating_by_smu)
5365 		/* Send msg to SMU via Powerplay */
5366 		amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5367 
5368 	WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5369 }
5370 
5371 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5372 							bool enable)
5373 {
5374 	WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5375 }
5376 
5377 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5378 		bool enable)
5379 {
5380 	WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5381 }
5382 
5383 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5384 					  bool enable)
5385 {
5386 	WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5387 }
5388 
5389 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5390 						bool enable)
5391 {
5392 	WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5393 
5394 	/* Read any GFX register to wake up GFX. */
5395 	if (!enable)
5396 		RREG32(mmDB_RENDER_CONTROL);
5397 }
5398 
5399 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5400 					  bool enable)
5401 {
5402 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5403 		cz_enable_gfx_cg_power_gating(adev, true);
5404 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5405 			cz_enable_gfx_pipeline_power_gating(adev, true);
5406 	} else {
5407 		cz_enable_gfx_cg_power_gating(adev, false);
5408 		cz_enable_gfx_pipeline_power_gating(adev, false);
5409 	}
5410 }
5411 
5412 static int gfx_v8_0_set_powergating_state(void *handle,
5413 					  enum amd_powergating_state state)
5414 {
5415 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5416 	bool enable = (state == AMD_PG_STATE_GATE);
5417 
5418 	if (amdgpu_sriov_vf(adev))
5419 		return 0;
5420 
5421 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5422 				AMD_PG_SUPPORT_RLC_SMU_HS |
5423 				AMD_PG_SUPPORT_CP |
5424 				AMD_PG_SUPPORT_GFX_DMG))
5425 		amdgpu_gfx_rlc_enter_safe_mode(adev);
5426 	switch (adev->asic_type) {
5427 	case CHIP_CARRIZO:
5428 	case CHIP_STONEY:
5429 
5430 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5431 			cz_enable_sck_slow_down_on_power_up(adev, true);
5432 			cz_enable_sck_slow_down_on_power_down(adev, true);
5433 		} else {
5434 			cz_enable_sck_slow_down_on_power_up(adev, false);
5435 			cz_enable_sck_slow_down_on_power_down(adev, false);
5436 		}
5437 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5438 			cz_enable_cp_power_gating(adev, true);
5439 		else
5440 			cz_enable_cp_power_gating(adev, false);
5441 
5442 		cz_update_gfx_cg_power_gating(adev, enable);
5443 
5444 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5445 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5446 		else
5447 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5448 
5449 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5450 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5451 		else
5452 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5453 		break;
5454 	case CHIP_POLARIS11:
5455 	case CHIP_POLARIS12:
5456 	case CHIP_VEGAM:
5457 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5458 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5459 		else
5460 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5461 
5462 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5463 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5464 		else
5465 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5466 
5467 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5468 			polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5469 		else
5470 			polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5471 		break;
5472 	default:
5473 		break;
5474 	}
5475 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5476 				AMD_PG_SUPPORT_RLC_SMU_HS |
5477 				AMD_PG_SUPPORT_CP |
5478 				AMD_PG_SUPPORT_GFX_DMG))
5479 		amdgpu_gfx_rlc_exit_safe_mode(adev);
5480 	return 0;
5481 }
5482 
5483 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5484 {
5485 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5486 	int data;
5487 
5488 	if (amdgpu_sriov_vf(adev))
5489 		*flags = 0;
5490 
5491 	/* AMD_CG_SUPPORT_GFX_MGCG */
5492 	data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5493 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5494 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5495 
5496 	/* AMD_CG_SUPPORT_GFX_CGLG */
5497 	data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5498 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5499 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5500 
5501 	/* AMD_CG_SUPPORT_GFX_CGLS */
5502 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5503 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5504 
5505 	/* AMD_CG_SUPPORT_GFX_CGTS */
5506 	data = RREG32(mmCGTS_SM_CTRL_REG);
5507 	if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5508 		*flags |= AMD_CG_SUPPORT_GFX_CGTS;
5509 
5510 	/* AMD_CG_SUPPORT_GFX_CGTS_LS */
5511 	if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5512 		*flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5513 
5514 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
5515 	data = RREG32(mmRLC_MEM_SLP_CNTL);
5516 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5517 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5518 
5519 	/* AMD_CG_SUPPORT_GFX_CP_LS */
5520 	data = RREG32(mmCP_MEM_SLP_CNTL);
5521 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5522 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5523 }
5524 
5525 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5526 				     uint32_t reg_addr, uint32_t cmd)
5527 {
5528 	uint32_t data;
5529 
5530 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5531 
5532 	WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5533 	WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5534 
5535 	data = RREG32(mmRLC_SERDES_WR_CTRL);
5536 	if (adev->asic_type == CHIP_STONEY)
5537 		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5538 			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5539 			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5540 			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5541 			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5542 			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5543 			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5544 			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5545 			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5546 	else
5547 		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5548 			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5549 			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5550 			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5551 			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5552 			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5553 			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5554 			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5555 			  RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5556 			  RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5557 			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5558 	data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5559 		 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5560 		 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5561 		 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5562 
5563 	WREG32(mmRLC_SERDES_WR_CTRL, data);
5564 }
5565 
5566 #define MSG_ENTER_RLC_SAFE_MODE     1
5567 #define MSG_EXIT_RLC_SAFE_MODE      0
5568 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5569 #define RLC_GPR_REG2__REQ__SHIFT 0
5570 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5571 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5572 
5573 static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
5574 {
5575 	uint32_t rlc_setting;
5576 
5577 	rlc_setting = RREG32(mmRLC_CNTL);
5578 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
5579 		return false;
5580 
5581 	return true;
5582 }
5583 
5584 static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev)
5585 {
5586 	uint32_t data;
5587 	unsigned i;
5588 	data = RREG32(mmRLC_CNTL);
5589 	data |= RLC_SAFE_MODE__CMD_MASK;
5590 	data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5591 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5592 	WREG32(mmRLC_SAFE_MODE, data);
5593 
5594 	/* wait for RLC_SAFE_MODE */
5595 	for (i = 0; i < adev->usec_timeout; i++) {
5596 		if ((RREG32(mmRLC_GPM_STAT) &
5597 		     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5598 		      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5599 		    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5600 		     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5601 			break;
5602 		udelay(1);
5603 	}
5604 	for (i = 0; i < adev->usec_timeout; i++) {
5605 		if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5606 			break;
5607 		udelay(1);
5608 	}
5609 }
5610 
5611 static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev)
5612 {
5613 	uint32_t data;
5614 	unsigned i;
5615 
5616 	data = RREG32(mmRLC_CNTL);
5617 	data |= RLC_SAFE_MODE__CMD_MASK;
5618 	data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5619 	WREG32(mmRLC_SAFE_MODE, data);
5620 
5621 	for (i = 0; i < adev->usec_timeout; i++) {
5622 		if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5623 			break;
5624 		udelay(1);
5625 	}
5626 }
5627 
5628 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5629 	.is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
5630 	.set_safe_mode = gfx_v8_0_set_safe_mode,
5631 	.unset_safe_mode = gfx_v8_0_unset_safe_mode,
5632 	.init = gfx_v8_0_rlc_init,
5633 	.get_csb_size = gfx_v8_0_get_csb_size,
5634 	.get_csb_buffer = gfx_v8_0_get_csb_buffer,
5635 	.get_cp_table_num = gfx_v8_0_cp_jump_table_num,
5636 	.resume = gfx_v8_0_rlc_resume,
5637 	.stop = gfx_v8_0_rlc_stop,
5638 	.reset = gfx_v8_0_rlc_reset,
5639 	.start = gfx_v8_0_rlc_start
5640 };
5641 
5642 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5643 						      bool enable)
5644 {
5645 	uint32_t temp, data;
5646 
5647 	amdgpu_gfx_rlc_enter_safe_mode(adev);
5648 
5649 	/* It is disabled by HW by default */
5650 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5651 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5652 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5653 				/* 1 - RLC memory Light sleep */
5654 				WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5655 
5656 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5657 				WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5658 		}
5659 
5660 		/* 3 - RLC_CGTT_MGCG_OVERRIDE */
5661 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5662 		if (adev->flags & AMD_IS_APU)
5663 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5664 				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5665 				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5666 		else
5667 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5668 				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5669 				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5670 				  RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5671 
5672 		if (temp != data)
5673 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5674 
5675 		/* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5676 		gfx_v8_0_wait_for_rlc_serdes(adev);
5677 
5678 		/* 5 - clear mgcg override */
5679 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5680 
5681 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5682 			/* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5683 			temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5684 			data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5685 			data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5686 			data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5687 			data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5688 			if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5689 			    (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5690 				data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5691 			data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5692 			data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5693 			if (temp != data)
5694 				WREG32(mmCGTS_SM_CTRL_REG, data);
5695 		}
5696 		udelay(50);
5697 
5698 		/* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5699 		gfx_v8_0_wait_for_rlc_serdes(adev);
5700 	} else {
5701 		/* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5702 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5703 		data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5704 				RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5705 				RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5706 				RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5707 		if (temp != data)
5708 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5709 
5710 		/* 2 - disable MGLS in RLC */
5711 		data = RREG32(mmRLC_MEM_SLP_CNTL);
5712 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5713 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5714 			WREG32(mmRLC_MEM_SLP_CNTL, data);
5715 		}
5716 
5717 		/* 3 - disable MGLS in CP */
5718 		data = RREG32(mmCP_MEM_SLP_CNTL);
5719 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5720 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5721 			WREG32(mmCP_MEM_SLP_CNTL, data);
5722 		}
5723 
5724 		/* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5725 		temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5726 		data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5727 				CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5728 		if (temp != data)
5729 			WREG32(mmCGTS_SM_CTRL_REG, data);
5730 
5731 		/* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5732 		gfx_v8_0_wait_for_rlc_serdes(adev);
5733 
5734 		/* 6 - set mgcg override */
5735 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5736 
5737 		udelay(50);
5738 
5739 		/* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5740 		gfx_v8_0_wait_for_rlc_serdes(adev);
5741 	}
5742 
5743 	amdgpu_gfx_rlc_exit_safe_mode(adev);
5744 }
5745 
5746 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5747 						      bool enable)
5748 {
5749 	uint32_t temp, temp1, data, data1;
5750 
5751 	temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5752 
5753 	amdgpu_gfx_rlc_enter_safe_mode(adev);
5754 
5755 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5756 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5757 		data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5758 		if (temp1 != data1)
5759 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5760 
5761 		/* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5762 		gfx_v8_0_wait_for_rlc_serdes(adev);
5763 
5764 		/* 2 - clear cgcg override */
5765 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5766 
5767 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5768 		gfx_v8_0_wait_for_rlc_serdes(adev);
5769 
5770 		/* 3 - write cmd to set CGLS */
5771 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5772 
5773 		/* 4 - enable cgcg */
5774 		data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5775 
5776 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5777 			/* enable cgls*/
5778 			data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5779 
5780 			temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5781 			data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5782 
5783 			if (temp1 != data1)
5784 				WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5785 		} else {
5786 			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5787 		}
5788 
5789 		if (temp != data)
5790 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5791 
5792 		/* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5793 		 * Cmp_busy/GFX_Idle interrupts
5794 		 */
5795 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5796 	} else {
5797 		/* disable cntx_empty_int_enable & GFX Idle interrupt */
5798 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5799 
5800 		/* TEST CGCG */
5801 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5802 		data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5803 				RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5804 		if (temp1 != data1)
5805 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5806 
5807 		/* read gfx register to wake up cgcg */
5808 		RREG32(mmCB_CGTT_SCLK_CTRL);
5809 		RREG32(mmCB_CGTT_SCLK_CTRL);
5810 		RREG32(mmCB_CGTT_SCLK_CTRL);
5811 		RREG32(mmCB_CGTT_SCLK_CTRL);
5812 
5813 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5814 		gfx_v8_0_wait_for_rlc_serdes(adev);
5815 
5816 		/* write cmd to Set CGCG Overrride */
5817 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5818 
5819 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5820 		gfx_v8_0_wait_for_rlc_serdes(adev);
5821 
5822 		/* write cmd to Clear CGLS */
5823 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5824 
5825 		/* disable cgcg, cgls should be disabled too. */
5826 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5827 			  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5828 		if (temp != data)
5829 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5830 		/* enable interrupts again for PG */
5831 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5832 	}
5833 
5834 	gfx_v8_0_wait_for_rlc_serdes(adev);
5835 
5836 	amdgpu_gfx_rlc_exit_safe_mode(adev);
5837 }
5838 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5839 					    bool enable)
5840 {
5841 	if (enable) {
5842 		/* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5843 		 * ===  MGCG + MGLS + TS(CG/LS) ===
5844 		 */
5845 		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5846 		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5847 	} else {
5848 		/* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5849 		 * ===  CGCG + CGLS ===
5850 		 */
5851 		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5852 		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5853 	}
5854 	return 0;
5855 }
5856 
5857 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5858 					  enum amd_clockgating_state state)
5859 {
5860 	uint32_t msg_id, pp_state = 0;
5861 	uint32_t pp_support_state = 0;
5862 
5863 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5864 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5865 			pp_support_state = PP_STATE_SUPPORT_LS;
5866 			pp_state = PP_STATE_LS;
5867 		}
5868 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5869 			pp_support_state |= PP_STATE_SUPPORT_CG;
5870 			pp_state |= PP_STATE_CG;
5871 		}
5872 		if (state == AMD_CG_STATE_UNGATE)
5873 			pp_state = 0;
5874 
5875 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5876 				PP_BLOCK_GFX_CG,
5877 				pp_support_state,
5878 				pp_state);
5879 		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5880 			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5881 	}
5882 
5883 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5884 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5885 			pp_support_state = PP_STATE_SUPPORT_LS;
5886 			pp_state = PP_STATE_LS;
5887 		}
5888 
5889 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5890 			pp_support_state |= PP_STATE_SUPPORT_CG;
5891 			pp_state |= PP_STATE_CG;
5892 		}
5893 
5894 		if (state == AMD_CG_STATE_UNGATE)
5895 			pp_state = 0;
5896 
5897 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5898 				PP_BLOCK_GFX_MG,
5899 				pp_support_state,
5900 				pp_state);
5901 		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5902 			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5903 	}
5904 
5905 	return 0;
5906 }
5907 
5908 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5909 					  enum amd_clockgating_state state)
5910 {
5911 
5912 	uint32_t msg_id, pp_state = 0;
5913 	uint32_t pp_support_state = 0;
5914 
5915 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5916 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5917 			pp_support_state = PP_STATE_SUPPORT_LS;
5918 			pp_state = PP_STATE_LS;
5919 		}
5920 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5921 			pp_support_state |= PP_STATE_SUPPORT_CG;
5922 			pp_state |= PP_STATE_CG;
5923 		}
5924 		if (state == AMD_CG_STATE_UNGATE)
5925 			pp_state = 0;
5926 
5927 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5928 				PP_BLOCK_GFX_CG,
5929 				pp_support_state,
5930 				pp_state);
5931 		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5932 			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5933 	}
5934 
5935 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5936 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5937 			pp_support_state = PP_STATE_SUPPORT_LS;
5938 			pp_state = PP_STATE_LS;
5939 		}
5940 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5941 			pp_support_state |= PP_STATE_SUPPORT_CG;
5942 			pp_state |= PP_STATE_CG;
5943 		}
5944 		if (state == AMD_CG_STATE_UNGATE)
5945 			pp_state = 0;
5946 
5947 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5948 				PP_BLOCK_GFX_3D,
5949 				pp_support_state,
5950 				pp_state);
5951 		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5952 			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5953 	}
5954 
5955 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5956 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5957 			pp_support_state = PP_STATE_SUPPORT_LS;
5958 			pp_state = PP_STATE_LS;
5959 		}
5960 
5961 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5962 			pp_support_state |= PP_STATE_SUPPORT_CG;
5963 			pp_state |= PP_STATE_CG;
5964 		}
5965 
5966 		if (state == AMD_CG_STATE_UNGATE)
5967 			pp_state = 0;
5968 
5969 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5970 				PP_BLOCK_GFX_MG,
5971 				pp_support_state,
5972 				pp_state);
5973 		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5974 			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5975 	}
5976 
5977 	if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5978 		pp_support_state = PP_STATE_SUPPORT_LS;
5979 
5980 		if (state == AMD_CG_STATE_UNGATE)
5981 			pp_state = 0;
5982 		else
5983 			pp_state = PP_STATE_LS;
5984 
5985 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5986 				PP_BLOCK_GFX_RLC,
5987 				pp_support_state,
5988 				pp_state);
5989 		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5990 			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5991 	}
5992 
5993 	if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5994 		pp_support_state = PP_STATE_SUPPORT_LS;
5995 
5996 		if (state == AMD_CG_STATE_UNGATE)
5997 			pp_state = 0;
5998 		else
5999 			pp_state = PP_STATE_LS;
6000 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6001 			PP_BLOCK_GFX_CP,
6002 			pp_support_state,
6003 			pp_state);
6004 		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6005 			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6006 	}
6007 
6008 	return 0;
6009 }
6010 
6011 static int gfx_v8_0_set_clockgating_state(void *handle,
6012 					  enum amd_clockgating_state state)
6013 {
6014 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6015 
6016 	if (amdgpu_sriov_vf(adev))
6017 		return 0;
6018 
6019 	switch (adev->asic_type) {
6020 	case CHIP_FIJI:
6021 	case CHIP_CARRIZO:
6022 	case CHIP_STONEY:
6023 		gfx_v8_0_update_gfx_clock_gating(adev,
6024 						 state == AMD_CG_STATE_GATE);
6025 		break;
6026 	case CHIP_TONGA:
6027 		gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6028 		break;
6029 	case CHIP_POLARIS10:
6030 	case CHIP_POLARIS11:
6031 	case CHIP_POLARIS12:
6032 	case CHIP_VEGAM:
6033 		gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6034 		break;
6035 	default:
6036 		break;
6037 	}
6038 	return 0;
6039 }
6040 
6041 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6042 {
6043 	return ring->adev->wb.wb[ring->rptr_offs];
6044 }
6045 
6046 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6047 {
6048 	struct amdgpu_device *adev = ring->adev;
6049 
6050 	if (ring->use_doorbell)
6051 		/* XXX check if swapping is necessary on BE */
6052 		return ring->adev->wb.wb[ring->wptr_offs];
6053 	else
6054 		return RREG32(mmCP_RB0_WPTR);
6055 }
6056 
6057 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6058 {
6059 	struct amdgpu_device *adev = ring->adev;
6060 
6061 	if (ring->use_doorbell) {
6062 		/* XXX check if swapping is necessary on BE */
6063 		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6064 		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6065 	} else {
6066 		WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6067 		(void)RREG32(mmCP_RB0_WPTR);
6068 	}
6069 }
6070 
6071 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6072 {
6073 	u32 ref_and_mask, reg_mem_engine;
6074 
6075 	if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6076 	    (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6077 		switch (ring->me) {
6078 		case 1:
6079 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6080 			break;
6081 		case 2:
6082 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6083 			break;
6084 		default:
6085 			return;
6086 		}
6087 		reg_mem_engine = 0;
6088 	} else {
6089 		ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6090 		reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6091 	}
6092 
6093 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6094 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6095 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
6096 				 reg_mem_engine));
6097 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6098 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6099 	amdgpu_ring_write(ring, ref_and_mask);
6100 	amdgpu_ring_write(ring, ref_and_mask);
6101 	amdgpu_ring_write(ring, 0x20); /* poll interval */
6102 }
6103 
6104 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6105 {
6106 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6107 	amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6108 		EVENT_INDEX(4));
6109 
6110 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6111 	amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6112 		EVENT_INDEX(0));
6113 }
6114 
6115 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6116 					struct amdgpu_job *job,
6117 					struct amdgpu_ib *ib,
6118 					uint32_t flags)
6119 {
6120 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6121 	u32 header, control = 0;
6122 
6123 	if (ib->flags & AMDGPU_IB_FLAG_CE)
6124 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6125 	else
6126 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6127 
6128 	control |= ib->length_dw | (vmid << 24);
6129 
6130 	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6131 		control |= INDIRECT_BUFFER_PRE_ENB(1);
6132 
6133 		if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6134 			gfx_v8_0_ring_emit_de_meta(ring);
6135 	}
6136 
6137 	amdgpu_ring_write(ring, header);
6138 	amdgpu_ring_write(ring,
6139 #ifdef __BIG_ENDIAN
6140 			  (2 << 0) |
6141 #endif
6142 			  (ib->gpu_addr & 0xFFFFFFFC));
6143 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6144 	amdgpu_ring_write(ring, control);
6145 }
6146 
6147 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6148 					  struct amdgpu_job *job,
6149 					  struct amdgpu_ib *ib,
6150 					  uint32_t flags)
6151 {
6152 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6153 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6154 
6155 	/* Currently, there is a high possibility to get wave ID mismatch
6156 	 * between ME and GDS, leading to a hw deadlock, because ME generates
6157 	 * different wave IDs than the GDS expects. This situation happens
6158 	 * randomly when at least 5 compute pipes use GDS ordered append.
6159 	 * The wave IDs generated by ME are also wrong after suspend/resume.
6160 	 * Those are probably bugs somewhere else in the kernel driver.
6161 	 *
6162 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
6163 	 * GDS to 0 for this ring (me/pipe).
6164 	 */
6165 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
6166 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
6167 		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
6168 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
6169 	}
6170 
6171 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6172 	amdgpu_ring_write(ring,
6173 #ifdef __BIG_ENDIAN
6174 				(2 << 0) |
6175 #endif
6176 				(ib->gpu_addr & 0xFFFFFFFC));
6177 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6178 	amdgpu_ring_write(ring, control);
6179 }
6180 
6181 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6182 					 u64 seq, unsigned flags)
6183 {
6184 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6185 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6186 
6187 	/* EVENT_WRITE_EOP - flush caches, send int */
6188 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6189 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6190 				 EOP_TC_ACTION_EN |
6191 				 EOP_TC_WB_ACTION_EN |
6192 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6193 				 EVENT_INDEX(5)));
6194 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6195 	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6196 			  DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6197 	amdgpu_ring_write(ring, lower_32_bits(seq));
6198 	amdgpu_ring_write(ring, upper_32_bits(seq));
6199 
6200 }
6201 
6202 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6203 {
6204 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6205 	uint32_t seq = ring->fence_drv.sync_seq;
6206 	uint64_t addr = ring->fence_drv.gpu_addr;
6207 
6208 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6209 	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6210 				 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6211 				 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6212 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6213 	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6214 	amdgpu_ring_write(ring, seq);
6215 	amdgpu_ring_write(ring, 0xffffffff);
6216 	amdgpu_ring_write(ring, 4); /* poll interval */
6217 }
6218 
6219 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6220 					unsigned vmid, uint64_t pd_addr)
6221 {
6222 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6223 
6224 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6225 
6226 	/* wait for the invalidate to complete */
6227 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6228 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6229 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6230 				 WAIT_REG_MEM_ENGINE(0))); /* me */
6231 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6232 	amdgpu_ring_write(ring, 0);
6233 	amdgpu_ring_write(ring, 0); /* ref */
6234 	amdgpu_ring_write(ring, 0); /* mask */
6235 	amdgpu_ring_write(ring, 0x20); /* poll interval */
6236 
6237 	/* compute doesn't have PFP */
6238 	if (usepfp) {
6239 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6240 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6241 		amdgpu_ring_write(ring, 0x0);
6242 	}
6243 }
6244 
6245 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6246 {
6247 	return ring->adev->wb.wb[ring->wptr_offs];
6248 }
6249 
6250 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6251 {
6252 	struct amdgpu_device *adev = ring->adev;
6253 
6254 	/* XXX check if swapping is necessary on BE */
6255 	adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6256 	WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6257 }
6258 
6259 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
6260 					   bool acquire)
6261 {
6262 	struct amdgpu_device *adev = ring->adev;
6263 	int pipe_num, tmp, reg;
6264 	int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
6265 
6266 	pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
6267 
6268 	/* first me only has 2 entries, GFX and HP3D */
6269 	if (ring->me > 0)
6270 		pipe_num -= 2;
6271 
6272 	reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
6273 	tmp = RREG32(reg);
6274 	tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
6275 	WREG32(reg, tmp);
6276 }
6277 
6278 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
6279 					    struct amdgpu_ring *ring,
6280 					    bool acquire)
6281 {
6282 	int i, pipe;
6283 	bool reserve;
6284 	struct amdgpu_ring *iring;
6285 
6286 	mutex_lock(&adev->gfx.pipe_reserve_mutex);
6287 	pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
6288 	if (acquire)
6289 		set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6290 	else
6291 		clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6292 
6293 	if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
6294 		/* Clear all reservations - everyone reacquires all resources */
6295 		for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
6296 			gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
6297 						       true);
6298 
6299 		for (i = 0; i < adev->gfx.num_compute_rings; ++i)
6300 			gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
6301 						       true);
6302 	} else {
6303 		/* Lower all pipes without a current reservation */
6304 		for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
6305 			iring = &adev->gfx.gfx_ring[i];
6306 			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
6307 							   iring->me,
6308 							   iring->pipe,
6309 							   0);
6310 			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6311 			gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6312 		}
6313 
6314 		for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
6315 			iring = &adev->gfx.compute_ring[i];
6316 			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
6317 							   iring->me,
6318 							   iring->pipe,
6319 							   0);
6320 			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6321 			gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6322 		}
6323 	}
6324 
6325 	mutex_unlock(&adev->gfx.pipe_reserve_mutex);
6326 }
6327 
6328 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
6329 				      struct amdgpu_ring *ring,
6330 				      bool acquire)
6331 {
6332 	uint32_t pipe_priority = acquire ? 0x2 : 0x0;
6333 	uint32_t queue_priority = acquire ? 0xf : 0x0;
6334 
6335 	mutex_lock(&adev->srbm_mutex);
6336 	vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6337 
6338 	WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
6339 	WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
6340 
6341 	vi_srbm_select(adev, 0, 0, 0, 0);
6342 	mutex_unlock(&adev->srbm_mutex);
6343 }
6344 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
6345 					       enum drm_sched_priority priority)
6346 {
6347 	struct amdgpu_device *adev = ring->adev;
6348 	bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
6349 
6350 	if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6351 		return;
6352 
6353 	gfx_v8_0_hqd_set_priority(adev, ring, acquire);
6354 	gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
6355 }
6356 
6357 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6358 					     u64 addr, u64 seq,
6359 					     unsigned flags)
6360 {
6361 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6362 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6363 
6364 	/* RELEASE_MEM - flush caches, send int */
6365 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6366 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6367 				 EOP_TC_ACTION_EN |
6368 				 EOP_TC_WB_ACTION_EN |
6369 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6370 				 EVENT_INDEX(5)));
6371 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6372 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6373 	amdgpu_ring_write(ring, upper_32_bits(addr));
6374 	amdgpu_ring_write(ring, lower_32_bits(seq));
6375 	amdgpu_ring_write(ring, upper_32_bits(seq));
6376 }
6377 
6378 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6379 					 u64 seq, unsigned int flags)
6380 {
6381 	/* we only allocate 32bit for each seq wb address */
6382 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6383 
6384 	/* write fence seq to the "addr" */
6385 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6386 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6387 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6388 	amdgpu_ring_write(ring, lower_32_bits(addr));
6389 	amdgpu_ring_write(ring, upper_32_bits(addr));
6390 	amdgpu_ring_write(ring, lower_32_bits(seq));
6391 
6392 	if (flags & AMDGPU_FENCE_FLAG_INT) {
6393 		/* set register to trigger INT */
6394 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6395 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6396 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6397 		amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6398 		amdgpu_ring_write(ring, 0);
6399 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6400 	}
6401 }
6402 
6403 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6404 {
6405 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6406 	amdgpu_ring_write(ring, 0);
6407 }
6408 
6409 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6410 {
6411 	uint32_t dw2 = 0;
6412 
6413 	if (amdgpu_sriov_vf(ring->adev))
6414 		gfx_v8_0_ring_emit_ce_meta(ring);
6415 
6416 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6417 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6418 		gfx_v8_0_ring_emit_vgt_flush(ring);
6419 		/* set load_global_config & load_global_uconfig */
6420 		dw2 |= 0x8001;
6421 		/* set load_cs_sh_regs */
6422 		dw2 |= 0x01000000;
6423 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
6424 		dw2 |= 0x10002;
6425 
6426 		/* set load_ce_ram if preamble presented */
6427 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6428 			dw2 |= 0x10000000;
6429 	} else {
6430 		/* still load_ce_ram if this is the first time preamble presented
6431 		 * although there is no context switch happens.
6432 		 */
6433 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6434 			dw2 |= 0x10000000;
6435 	}
6436 
6437 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6438 	amdgpu_ring_write(ring, dw2);
6439 	amdgpu_ring_write(ring, 0);
6440 }
6441 
6442 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6443 {
6444 	unsigned ret;
6445 
6446 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6447 	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6448 	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6449 	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6450 	ret = ring->wptr & ring->buf_mask;
6451 	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6452 	return ret;
6453 }
6454 
6455 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6456 {
6457 	unsigned cur;
6458 
6459 	BUG_ON(offset > ring->buf_mask);
6460 	BUG_ON(ring->ring[offset] != 0x55aa55aa);
6461 
6462 	cur = (ring->wptr & ring->buf_mask) - 1;
6463 	if (likely(cur > offset))
6464 		ring->ring[offset] = cur - offset;
6465 	else
6466 		ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6467 }
6468 
6469 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6470 {
6471 	struct amdgpu_device *adev = ring->adev;
6472 
6473 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6474 	amdgpu_ring_write(ring, 0 |	/* src: register*/
6475 				(5 << 8) |	/* dst: memory */
6476 				(1 << 20));	/* write confirm */
6477 	amdgpu_ring_write(ring, reg);
6478 	amdgpu_ring_write(ring, 0);
6479 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6480 				adev->virt.reg_val_offs * 4));
6481 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6482 				adev->virt.reg_val_offs * 4));
6483 }
6484 
6485 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6486 				  uint32_t val)
6487 {
6488 	uint32_t cmd;
6489 
6490 	switch (ring->funcs->type) {
6491 	case AMDGPU_RING_TYPE_GFX:
6492 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6493 		break;
6494 	case AMDGPU_RING_TYPE_KIQ:
6495 		cmd = 1 << 16; /* no inc addr */
6496 		break;
6497 	default:
6498 		cmd = WR_CONFIRM;
6499 		break;
6500 	}
6501 
6502 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6503 	amdgpu_ring_write(ring, cmd);
6504 	amdgpu_ring_write(ring, reg);
6505 	amdgpu_ring_write(ring, 0);
6506 	amdgpu_ring_write(ring, val);
6507 }
6508 
6509 static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6510 {
6511 	struct amdgpu_device *adev = ring->adev;
6512 	uint32_t value = 0;
6513 
6514 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6515 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6516 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6517 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6518 	WREG32(mmSQ_CMD, value);
6519 }
6520 
6521 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6522 						 enum amdgpu_interrupt_state state)
6523 {
6524 	WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6525 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6526 }
6527 
6528 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6529 						     int me, int pipe,
6530 						     enum amdgpu_interrupt_state state)
6531 {
6532 	u32 mec_int_cntl, mec_int_cntl_reg;
6533 
6534 	/*
6535 	 * amdgpu controls only the first MEC. That's why this function only
6536 	 * handles the setting of interrupts for this specific MEC. All other
6537 	 * pipes' interrupts are set by amdkfd.
6538 	 */
6539 
6540 	if (me == 1) {
6541 		switch (pipe) {
6542 		case 0:
6543 			mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6544 			break;
6545 		case 1:
6546 			mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6547 			break;
6548 		case 2:
6549 			mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6550 			break;
6551 		case 3:
6552 			mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6553 			break;
6554 		default:
6555 			DRM_DEBUG("invalid pipe %d\n", pipe);
6556 			return;
6557 		}
6558 	} else {
6559 		DRM_DEBUG("invalid me %d\n", me);
6560 		return;
6561 	}
6562 
6563 	switch (state) {
6564 	case AMDGPU_IRQ_STATE_DISABLE:
6565 		mec_int_cntl = RREG32(mec_int_cntl_reg);
6566 		mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6567 		WREG32(mec_int_cntl_reg, mec_int_cntl);
6568 		break;
6569 	case AMDGPU_IRQ_STATE_ENABLE:
6570 		mec_int_cntl = RREG32(mec_int_cntl_reg);
6571 		mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6572 		WREG32(mec_int_cntl_reg, mec_int_cntl);
6573 		break;
6574 	default:
6575 		break;
6576 	}
6577 }
6578 
6579 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6580 					     struct amdgpu_irq_src *source,
6581 					     unsigned type,
6582 					     enum amdgpu_interrupt_state state)
6583 {
6584 	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6585 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6586 
6587 	return 0;
6588 }
6589 
6590 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6591 					      struct amdgpu_irq_src *source,
6592 					      unsigned type,
6593 					      enum amdgpu_interrupt_state state)
6594 {
6595 	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6596 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6597 
6598 	return 0;
6599 }
6600 
6601 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6602 					    struct amdgpu_irq_src *src,
6603 					    unsigned type,
6604 					    enum amdgpu_interrupt_state state)
6605 {
6606 	switch (type) {
6607 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6608 		gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6609 		break;
6610 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6611 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6612 		break;
6613 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6614 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6615 		break;
6616 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6617 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6618 		break;
6619 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6620 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6621 		break;
6622 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6623 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6624 		break;
6625 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6626 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6627 		break;
6628 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6629 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6630 		break;
6631 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6632 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6633 		break;
6634 	default:
6635 		break;
6636 	}
6637 	return 0;
6638 }
6639 
6640 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6641 					 struct amdgpu_irq_src *source,
6642 					 unsigned int type,
6643 					 enum amdgpu_interrupt_state state)
6644 {
6645 	int enable_flag;
6646 
6647 	switch (state) {
6648 	case AMDGPU_IRQ_STATE_DISABLE:
6649 		enable_flag = 0;
6650 		break;
6651 
6652 	case AMDGPU_IRQ_STATE_ENABLE:
6653 		enable_flag = 1;
6654 		break;
6655 
6656 	default:
6657 		return -EINVAL;
6658 	}
6659 
6660 	WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6661 	WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6662 	WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6663 	WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6664 	WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6665 	WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6666 		     enable_flag);
6667 	WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6668 		     enable_flag);
6669 	WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6670 		     enable_flag);
6671 	WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6672 		     enable_flag);
6673 	WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6674 		     enable_flag);
6675 	WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6676 		     enable_flag);
6677 	WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6678 		     enable_flag);
6679 	WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6680 		     enable_flag);
6681 
6682 	return 0;
6683 }
6684 
6685 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6686 				     struct amdgpu_irq_src *source,
6687 				     unsigned int type,
6688 				     enum amdgpu_interrupt_state state)
6689 {
6690 	int enable_flag;
6691 
6692 	switch (state) {
6693 	case AMDGPU_IRQ_STATE_DISABLE:
6694 		enable_flag = 1;
6695 		break;
6696 
6697 	case AMDGPU_IRQ_STATE_ENABLE:
6698 		enable_flag = 0;
6699 		break;
6700 
6701 	default:
6702 		return -EINVAL;
6703 	}
6704 
6705 	WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6706 		     enable_flag);
6707 
6708 	return 0;
6709 }
6710 
6711 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6712 			    struct amdgpu_irq_src *source,
6713 			    struct amdgpu_iv_entry *entry)
6714 {
6715 	int i;
6716 	u8 me_id, pipe_id, queue_id;
6717 	struct amdgpu_ring *ring;
6718 
6719 	DRM_DEBUG("IH: CP EOP\n");
6720 	me_id = (entry->ring_id & 0x0c) >> 2;
6721 	pipe_id = (entry->ring_id & 0x03) >> 0;
6722 	queue_id = (entry->ring_id & 0x70) >> 4;
6723 
6724 	switch (me_id) {
6725 	case 0:
6726 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6727 		break;
6728 	case 1:
6729 	case 2:
6730 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6731 			ring = &adev->gfx.compute_ring[i];
6732 			/* Per-queue interrupt is supported for MEC starting from VI.
6733 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
6734 			  */
6735 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6736 				amdgpu_fence_process(ring);
6737 		}
6738 		break;
6739 	}
6740 	return 0;
6741 }
6742 
6743 static void gfx_v8_0_fault(struct amdgpu_device *adev,
6744 			   struct amdgpu_iv_entry *entry)
6745 {
6746 	u8 me_id, pipe_id, queue_id;
6747 	struct amdgpu_ring *ring;
6748 	int i;
6749 
6750 	me_id = (entry->ring_id & 0x0c) >> 2;
6751 	pipe_id = (entry->ring_id & 0x03) >> 0;
6752 	queue_id = (entry->ring_id & 0x70) >> 4;
6753 
6754 	switch (me_id) {
6755 	case 0:
6756 		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6757 		break;
6758 	case 1:
6759 	case 2:
6760 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6761 			ring = &adev->gfx.compute_ring[i];
6762 			if (ring->me == me_id && ring->pipe == pipe_id &&
6763 			    ring->queue == queue_id)
6764 				drm_sched_fault(&ring->sched);
6765 		}
6766 		break;
6767 	}
6768 }
6769 
6770 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6771 				 struct amdgpu_irq_src *source,
6772 				 struct amdgpu_iv_entry *entry)
6773 {
6774 	DRM_ERROR("Illegal register access in command stream\n");
6775 	gfx_v8_0_fault(adev, entry);
6776 	return 0;
6777 }
6778 
6779 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6780 				  struct amdgpu_irq_src *source,
6781 				  struct amdgpu_iv_entry *entry)
6782 {
6783 	DRM_ERROR("Illegal instruction in command stream\n");
6784 	gfx_v8_0_fault(adev, entry);
6785 	return 0;
6786 }
6787 
6788 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6789 				     struct amdgpu_irq_src *source,
6790 				     struct amdgpu_iv_entry *entry)
6791 {
6792 	DRM_ERROR("CP EDC/ECC error detected.");
6793 	return 0;
6794 }
6795 
6796 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data)
6797 {
6798 	u32 enc, se_id, sh_id, cu_id;
6799 	char type[20];
6800 	int sq_edc_source = -1;
6801 
6802 	enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6803 	se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6804 
6805 	switch (enc) {
6806 		case 0:
6807 			DRM_INFO("SQ general purpose intr detected:"
6808 					"se_id %d, immed_overflow %d, host_reg_overflow %d,"
6809 					"host_cmd_overflow %d, cmd_timestamp %d,"
6810 					"reg_timestamp %d, thread_trace_buff_full %d,"
6811 					"wlt %d, thread_trace %d.\n",
6812 					se_id,
6813 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6814 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6815 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6816 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6817 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6818 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6819 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6820 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6821 					);
6822 			break;
6823 		case 1:
6824 		case 2:
6825 
6826 			cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6827 			sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6828 
6829 			/*
6830 			 * This function can be called either directly from ISR
6831 			 * or from BH in which case we can access SQ_EDC_INFO
6832 			 * instance
6833 			 */
6834 			if (in_task()) {
6835 				mutex_lock(&adev->grbm_idx_mutex);
6836 				gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
6837 
6838 				sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6839 
6840 				gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6841 				mutex_unlock(&adev->grbm_idx_mutex);
6842 			}
6843 
6844 			if (enc == 1)
6845 				sprintf(type, "instruction intr");
6846 			else
6847 				sprintf(type, "EDC/ECC error");
6848 
6849 			DRM_INFO(
6850 				"SQ %s detected: "
6851 					"se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6852 					"trap %s, sq_ed_info.source %s.\n",
6853 					type, se_id, sh_id, cu_id,
6854 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6855 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6856 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6857 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6858 					(sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6859 				);
6860 			break;
6861 		default:
6862 			DRM_ERROR("SQ invalid encoding type\n.");
6863 	}
6864 }
6865 
6866 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6867 {
6868 
6869 	struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6870 	struct sq_work *sq_work = container_of(work, struct sq_work, work);
6871 
6872 	gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data);
6873 }
6874 
6875 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6876 			   struct amdgpu_irq_src *source,
6877 			   struct amdgpu_iv_entry *entry)
6878 {
6879 	unsigned ih_data = entry->src_data[0];
6880 
6881 	/*
6882 	 * Try to submit work so SQ_EDC_INFO can be accessed from
6883 	 * BH. If previous work submission hasn't finished yet
6884 	 * just print whatever info is possible directly from the ISR.
6885 	 */
6886 	if (work_pending(&adev->gfx.sq_work.work)) {
6887 		gfx_v8_0_parse_sq_irq(adev, ih_data);
6888 	} else {
6889 		adev->gfx.sq_work.ih_data = ih_data;
6890 		schedule_work(&adev->gfx.sq_work.work);
6891 	}
6892 
6893 	return 0;
6894 }
6895 
6896 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6897 	.name = "gfx_v8_0",
6898 	.early_init = gfx_v8_0_early_init,
6899 	.late_init = gfx_v8_0_late_init,
6900 	.sw_init = gfx_v8_0_sw_init,
6901 	.sw_fini = gfx_v8_0_sw_fini,
6902 	.hw_init = gfx_v8_0_hw_init,
6903 	.hw_fini = gfx_v8_0_hw_fini,
6904 	.suspend = gfx_v8_0_suspend,
6905 	.resume = gfx_v8_0_resume,
6906 	.is_idle = gfx_v8_0_is_idle,
6907 	.wait_for_idle = gfx_v8_0_wait_for_idle,
6908 	.check_soft_reset = gfx_v8_0_check_soft_reset,
6909 	.pre_soft_reset = gfx_v8_0_pre_soft_reset,
6910 	.soft_reset = gfx_v8_0_soft_reset,
6911 	.post_soft_reset = gfx_v8_0_post_soft_reset,
6912 	.set_clockgating_state = gfx_v8_0_set_clockgating_state,
6913 	.set_powergating_state = gfx_v8_0_set_powergating_state,
6914 	.get_clockgating_state = gfx_v8_0_get_clockgating_state,
6915 };
6916 
6917 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6918 	.type = AMDGPU_RING_TYPE_GFX,
6919 	.align_mask = 0xff,
6920 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6921 	.support_64bit_ptrs = false,
6922 	.get_rptr = gfx_v8_0_ring_get_rptr,
6923 	.get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6924 	.set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6925 	.emit_frame_size = /* maximum 215dw if count 16 IBs in */
6926 		5 +  /* COND_EXEC */
6927 		7 +  /* PIPELINE_SYNC */
6928 		VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6929 		8 +  /* FENCE for VM_FLUSH */
6930 		20 + /* GDS switch */
6931 		4 + /* double SWITCH_BUFFER,
6932 		       the first COND_EXEC jump to the place just
6933 			   prior to this double SWITCH_BUFFER  */
6934 		5 + /* COND_EXEC */
6935 		7 +	 /*	HDP_flush */
6936 		4 +	 /*	VGT_flush */
6937 		14 + /*	CE_META */
6938 		31 + /*	DE_META */
6939 		3 + /* CNTX_CTRL */
6940 		5 + /* HDP_INVL */
6941 		8 + 8 + /* FENCE x2 */
6942 		2, /* SWITCH_BUFFER */
6943 	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_gfx */
6944 	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6945 	.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6946 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6947 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6948 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6949 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6950 	.test_ring = gfx_v8_0_ring_test_ring,
6951 	.test_ib = gfx_v8_0_ring_test_ib,
6952 	.insert_nop = amdgpu_ring_insert_nop,
6953 	.pad_ib = amdgpu_ring_generic_pad_ib,
6954 	.emit_switch_buffer = gfx_v8_ring_emit_sb,
6955 	.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6956 	.init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6957 	.patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6958 	.emit_wreg = gfx_v8_0_ring_emit_wreg,
6959 	.soft_recovery = gfx_v8_0_ring_soft_recovery,
6960 };
6961 
6962 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6963 	.type = AMDGPU_RING_TYPE_COMPUTE,
6964 	.align_mask = 0xff,
6965 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6966 	.support_64bit_ptrs = false,
6967 	.get_rptr = gfx_v8_0_ring_get_rptr,
6968 	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
6969 	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
6970 	.emit_frame_size =
6971 		20 + /* gfx_v8_0_ring_emit_gds_switch */
6972 		7 + /* gfx_v8_0_ring_emit_hdp_flush */
6973 		5 + /* hdp_invalidate */
6974 		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6975 		VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6976 		7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6977 	.emit_ib_size =	7, /* gfx_v8_0_ring_emit_ib_compute */
6978 	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
6979 	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
6980 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6981 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6982 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6983 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6984 	.test_ring = gfx_v8_0_ring_test_ring,
6985 	.test_ib = gfx_v8_0_ring_test_ib,
6986 	.insert_nop = amdgpu_ring_insert_nop,
6987 	.pad_ib = amdgpu_ring_generic_pad_ib,
6988 	.set_priority = gfx_v8_0_ring_set_priority_compute,
6989 	.emit_wreg = gfx_v8_0_ring_emit_wreg,
6990 };
6991 
6992 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6993 	.type = AMDGPU_RING_TYPE_KIQ,
6994 	.align_mask = 0xff,
6995 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6996 	.support_64bit_ptrs = false,
6997 	.get_rptr = gfx_v8_0_ring_get_rptr,
6998 	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
6999 	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
7000 	.emit_frame_size =
7001 		20 + /* gfx_v8_0_ring_emit_gds_switch */
7002 		7 + /* gfx_v8_0_ring_emit_hdp_flush */
7003 		5 + /* hdp_invalidate */
7004 		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7005 		17 + /* gfx_v8_0_ring_emit_vm_flush */
7006 		7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7007 	.emit_ib_size =	7, /* gfx_v8_0_ring_emit_ib_compute */
7008 	.emit_fence = gfx_v8_0_ring_emit_fence_kiq,
7009 	.test_ring = gfx_v8_0_ring_test_ring,
7010 	.insert_nop = amdgpu_ring_insert_nop,
7011 	.pad_ib = amdgpu_ring_generic_pad_ib,
7012 	.emit_rreg = gfx_v8_0_ring_emit_rreg,
7013 	.emit_wreg = gfx_v8_0_ring_emit_wreg,
7014 };
7015 
7016 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7017 {
7018 	int i;
7019 
7020 	adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7021 
7022 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7023 		adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7024 
7025 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
7026 		adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7027 }
7028 
7029 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7030 	.set = gfx_v8_0_set_eop_interrupt_state,
7031 	.process = gfx_v8_0_eop_irq,
7032 };
7033 
7034 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7035 	.set = gfx_v8_0_set_priv_reg_fault_state,
7036 	.process = gfx_v8_0_priv_reg_irq,
7037 };
7038 
7039 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7040 	.set = gfx_v8_0_set_priv_inst_fault_state,
7041 	.process = gfx_v8_0_priv_inst_irq,
7042 };
7043 
7044 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
7045 	.set = gfx_v8_0_set_cp_ecc_int_state,
7046 	.process = gfx_v8_0_cp_ecc_error_irq,
7047 };
7048 
7049 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
7050 	.set = gfx_v8_0_set_sq_int_state,
7051 	.process = gfx_v8_0_sq_irq,
7052 };
7053 
7054 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7055 {
7056 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7057 	adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7058 
7059 	adev->gfx.priv_reg_irq.num_types = 1;
7060 	adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7061 
7062 	adev->gfx.priv_inst_irq.num_types = 1;
7063 	adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7064 
7065 	adev->gfx.cp_ecc_error_irq.num_types = 1;
7066 	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7067 
7068 	adev->gfx.sq_irq.num_types = 1;
7069 	adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7070 }
7071 
7072 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7073 {
7074 	adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7075 }
7076 
7077 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7078 {
7079 	/* init asci gds info */
7080 	adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
7081 	adev->gds.gws_size = 64;
7082 	adev->gds.oa_size = 16;
7083 	adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
7084 }
7085 
7086 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7087 						 u32 bitmap)
7088 {
7089 	u32 data;
7090 
7091 	if (!bitmap)
7092 		return;
7093 
7094 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7095 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7096 
7097 	WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7098 }
7099 
7100 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7101 {
7102 	u32 data, mask;
7103 
7104 	data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7105 		RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7106 
7107 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7108 
7109 	return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7110 }
7111 
7112 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7113 {
7114 	int i, j, k, counter, active_cu_number = 0;
7115 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7116 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7117 	unsigned disable_masks[4 * 2];
7118 	u32 ao_cu_num;
7119 
7120 	memset(cu_info, 0, sizeof(*cu_info));
7121 
7122 	if (adev->flags & AMD_IS_APU)
7123 		ao_cu_num = 2;
7124 	else
7125 		ao_cu_num = adev->gfx.config.max_cu_per_sh;
7126 
7127 	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7128 
7129 	mutex_lock(&adev->grbm_idx_mutex);
7130 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7131 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7132 			mask = 1;
7133 			ao_bitmap = 0;
7134 			counter = 0;
7135 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7136 			if (i < 4 && j < 2)
7137 				gfx_v8_0_set_user_cu_inactive_bitmap(
7138 					adev, disable_masks[i * 2 + j]);
7139 			bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7140 			cu_info->bitmap[i][j] = bitmap;
7141 
7142 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7143 				if (bitmap & mask) {
7144 					if (counter < ao_cu_num)
7145 						ao_bitmap |= mask;
7146 					counter ++;
7147 				}
7148 				mask <<= 1;
7149 			}
7150 			active_cu_number += counter;
7151 			if (i < 2 && j < 2)
7152 				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7153 			cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7154 		}
7155 	}
7156 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7157 	mutex_unlock(&adev->grbm_idx_mutex);
7158 
7159 	cu_info->number = active_cu_number;
7160 	cu_info->ao_cu_mask = ao_cu_mask;
7161 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7162 	cu_info->max_waves_per_simd = 10;
7163 	cu_info->max_scratch_slots_per_cu = 32;
7164 	cu_info->wave_front_size = 64;
7165 	cu_info->lds_size = 64;
7166 }
7167 
7168 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7169 {
7170 	.type = AMD_IP_BLOCK_TYPE_GFX,
7171 	.major = 8,
7172 	.minor = 0,
7173 	.rev = 0,
7174 	.funcs = &gfx_v8_0_ip_funcs,
7175 };
7176 
7177 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7178 {
7179 	.type = AMD_IP_BLOCK_TYPE_GFX,
7180 	.major = 8,
7181 	.minor = 1,
7182 	.rev = 0,
7183 	.funcs = &gfx_v8_0_ip_funcs,
7184 };
7185 
7186 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7187 {
7188 	uint64_t ce_payload_addr;
7189 	int cnt_ce;
7190 	union {
7191 		struct vi_ce_ib_state regular;
7192 		struct vi_ce_ib_state_chained_ib chained;
7193 	} ce_payload = {};
7194 
7195 	if (ring->adev->virt.chained_ib_support) {
7196 		ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7197 			offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7198 		cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7199 	} else {
7200 		ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7201 			offsetof(struct vi_gfx_meta_data, ce_payload);
7202 		cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7203 	}
7204 
7205 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7206 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7207 				WRITE_DATA_DST_SEL(8) |
7208 				WR_CONFIRM) |
7209 				WRITE_DATA_CACHE_POLICY(0));
7210 	amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7211 	amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7212 	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7213 }
7214 
7215 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7216 {
7217 	uint64_t de_payload_addr, gds_addr, csa_addr;
7218 	int cnt_de;
7219 	union {
7220 		struct vi_de_ib_state regular;
7221 		struct vi_de_ib_state_chained_ib chained;
7222 	} de_payload = {};
7223 
7224 	csa_addr = amdgpu_csa_vaddr(ring->adev);
7225 	gds_addr = csa_addr + 4096;
7226 	if (ring->adev->virt.chained_ib_support) {
7227 		de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7228 		de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7229 		de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7230 		cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7231 	} else {
7232 		de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7233 		de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7234 		de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7235 		cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7236 	}
7237 
7238 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7239 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7240 				WRITE_DATA_DST_SEL(8) |
7241 				WR_CONFIRM) |
7242 				WRITE_DATA_CACHE_POLICY(0));
7243 	amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7244 	amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7245 	amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7246 }
7247