xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c (revision 04eb94d526423ff082efce61f4f26b0369d0bfdd)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "vi.h"
33 #include "vi_structs.h"
34 #include "vid.h"
35 #include "amdgpu_ucode.h"
36 #include "amdgpu_atombios.h"
37 #include "atombios_i2c.h"
38 #include "clearstate_vi.h"
39 
40 #include "gmc/gmc_8_2_d.h"
41 #include "gmc/gmc_8_2_sh_mask.h"
42 
43 #include "oss/oss_3_0_d.h"
44 #include "oss/oss_3_0_sh_mask.h"
45 
46 #include "bif/bif_5_0_d.h"
47 #include "bif/bif_5_0_sh_mask.h"
48 #include "gca/gfx_8_0_d.h"
49 #include "gca/gfx_8_0_enum.h"
50 #include "gca/gfx_8_0_sh_mask.h"
51 
52 #include "dce/dce_10_0_d.h"
53 #include "dce/dce_10_0_sh_mask.h"
54 
55 #include "smu/smu_7_1_3_d.h"
56 
57 #include "ivsrcid/ivsrcid_vislands30.h"
58 
59 #define GFX8_NUM_GFX_RINGS     1
60 #define GFX8_MEC_HPD_SIZE 4096
61 
62 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
63 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
64 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
65 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
66 
67 #define ARRAY_MODE(x)					((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
68 #define PIPE_CONFIG(x)					((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
69 #define TILE_SPLIT(x)					((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
70 #define MICRO_TILE_MODE_NEW(x)				((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
71 #define SAMPLE_SPLIT(x)					((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
72 #define BANK_WIDTH(x)					((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
73 #define BANK_HEIGHT(x)					((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
74 #define MACRO_TILE_ASPECT(x)				((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
75 #define NUM_BANKS(x)					((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
76 
77 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
78 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
79 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
80 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
81 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
82 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
83 
84 /* BPM SERDES CMD */
85 #define SET_BPM_SERDES_CMD    1
86 #define CLE_BPM_SERDES_CMD    0
87 
88 /* BPM Register Address*/
89 enum {
90 	BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
91 	BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
92 	BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
93 	BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
94 	BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
95 	BPM_REG_FGCG_MAX
96 };
97 
98 #define RLC_FormatDirectRegListLength        14
99 
100 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
101 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
102 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
103 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
104 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
105 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
106 
107 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
108 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
110 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
111 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
112 
113 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
114 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
115 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
116 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
117 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
118 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
119 
120 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
121 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
122 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
123 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
124 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
125 
126 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
127 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
128 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
129 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
130 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
131 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
132 
133 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
134 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
141 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
142 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
143 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
144 
145 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
146 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
147 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
148 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
149 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
150 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
151 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
152 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
153 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
154 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
155 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
156 
157 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
158 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
163 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
164 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
165 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
166 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
167 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
168 
169 MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
170 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
171 MODULE_FIRMWARE("amdgpu/vegam_me.bin");
172 MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
173 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
174 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
175 
176 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
177 {
178 	{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
179 	{mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
180 	{mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
181 	{mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
182 	{mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
183 	{mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
184 	{mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
185 	{mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
186 	{mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
187 	{mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
188 	{mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
189 	{mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
190 	{mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
191 	{mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
192 	{mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
193 	{mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
194 };
195 
196 static const u32 golden_settings_tonga_a11[] =
197 {
198 	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
199 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
200 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
201 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
202 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
203 	mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
204 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
205 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
206 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
207 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
208 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
209 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
210 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
211 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
212 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
213 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
214 };
215 
216 static const u32 tonga_golden_common_all[] =
217 {
218 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
219 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
220 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
221 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
222 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
223 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
224 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
225 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
226 };
227 
228 static const u32 tonga_mgcg_cgcg_init[] =
229 {
230 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
231 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
232 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
233 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
234 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
235 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
236 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
237 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
238 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
239 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
240 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
241 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
242 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
243 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
244 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
245 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
246 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
247 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
248 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
249 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
250 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
251 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
252 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
253 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
254 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
255 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
256 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
257 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
258 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
259 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
260 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
261 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
262 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
263 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
264 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
265 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
266 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
267 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
268 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
269 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
270 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
271 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
272 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
273 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
274 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
275 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
276 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
277 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
278 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
279 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
280 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
281 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
282 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
283 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
284 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
285 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
286 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
287 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
288 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
289 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
290 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
291 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
292 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
293 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
294 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
295 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
296 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
297 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
298 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
299 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
300 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
301 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
302 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
303 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
304 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
305 };
306 
307 static const u32 golden_settings_vegam_a11[] =
308 {
309 	mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
310 	mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
311 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
312 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
313 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
314 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
315 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
316 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
317 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
318 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
319 	mmSQ_CONFIG, 0x07f80000, 0x01180000,
320 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
321 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
322 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
323 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
324 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
325 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
326 };
327 
328 static const u32 vegam_golden_common_all[] =
329 {
330 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
331 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
332 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
333 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
334 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
335 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
336 };
337 
338 static const u32 golden_settings_polaris11_a11[] =
339 {
340 	mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
341 	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
342 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
343 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
344 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
345 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
346 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
347 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
348 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
349 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
350 	mmSQ_CONFIG, 0x07f80000, 0x01180000,
351 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
352 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
353 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
354 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
355 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
356 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
357 };
358 
359 static const u32 polaris11_golden_common_all[] =
360 {
361 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
362 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
363 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
364 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
365 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
366 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
367 };
368 
369 static const u32 golden_settings_polaris10_a11[] =
370 {
371 	mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
372 	mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
373 	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
374 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
375 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
376 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
377 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
378 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
379 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
380 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
381 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
382 	mmSQ_CONFIG, 0x07f80000, 0x07180000,
383 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
384 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
385 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
386 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
387 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
388 };
389 
390 static const u32 polaris10_golden_common_all[] =
391 {
392 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
393 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
394 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
395 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
396 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
397 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
398 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
399 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
400 };
401 
402 static const u32 fiji_golden_common_all[] =
403 {
404 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
405 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
406 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
407 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
408 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
409 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
410 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
411 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
412 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
413 	mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
414 };
415 
416 static const u32 golden_settings_fiji_a10[] =
417 {
418 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
419 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
420 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
421 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
422 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
423 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
424 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
425 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
426 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
427 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
428 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
429 };
430 
431 static const u32 fiji_mgcg_cgcg_init[] =
432 {
433 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
434 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
435 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
436 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
437 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
438 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
439 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
440 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
441 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
442 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
443 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
444 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
445 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
446 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
447 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
448 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
449 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
450 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
451 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
452 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
453 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
454 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
455 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
456 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
457 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
458 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
459 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
460 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
461 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
462 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
463 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
464 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
465 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
466 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
467 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
468 };
469 
470 static const u32 golden_settings_iceland_a11[] =
471 {
472 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
473 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
474 	mmDB_DEBUG3, 0xc0000000, 0xc0000000,
475 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
476 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
477 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
478 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
479 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
480 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
481 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
482 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
483 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
484 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
485 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
486 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
487 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
488 };
489 
490 static const u32 iceland_golden_common_all[] =
491 {
492 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
493 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
494 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
495 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
496 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
497 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
498 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
499 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
500 };
501 
502 static const u32 iceland_mgcg_cgcg_init[] =
503 {
504 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
505 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
506 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
507 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
508 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
509 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
510 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
511 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
512 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
513 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
514 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
515 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
516 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
517 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
518 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
519 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
520 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
521 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
522 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
523 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
524 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
525 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
526 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
527 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
528 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
529 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
530 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
531 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
532 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
533 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
534 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
535 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
536 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
537 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
538 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
539 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
540 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
541 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
542 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
543 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
544 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
545 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
546 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
547 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
548 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
549 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
550 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
551 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
552 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
553 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
554 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
555 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
556 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
557 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
558 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
559 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
560 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
561 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
562 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
563 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
564 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
565 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
566 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
567 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
568 };
569 
570 static const u32 cz_golden_settings_a11[] =
571 {
572 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
573 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
574 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
575 	mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
576 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
577 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
578 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
579 	mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
580 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
581 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
582 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
583 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
584 };
585 
586 static const u32 cz_golden_common_all[] =
587 {
588 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
589 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
590 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
591 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
592 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
593 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
594 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
595 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
596 };
597 
598 static const u32 cz_mgcg_cgcg_init[] =
599 {
600 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
601 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
602 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
603 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
604 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
605 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
606 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
607 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
608 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
609 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
610 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
611 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
612 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
613 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
614 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
615 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
616 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
617 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
618 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
619 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
620 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
621 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
622 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
623 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
624 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
625 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
626 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
627 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
628 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
629 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
630 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
631 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
632 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
633 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
634 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
635 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
636 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
637 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
638 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
639 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
640 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
641 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
642 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
643 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
644 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
645 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
646 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
647 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
648 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
649 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
650 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
651 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
652 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
653 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
654 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
655 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
656 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
657 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
658 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
659 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
660 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
661 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
662 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
663 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
664 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
665 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
666 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
667 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
668 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
669 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
670 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
671 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
672 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
673 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
674 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
675 };
676 
677 static const u32 stoney_golden_settings_a11[] =
678 {
679 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
680 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
681 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
682 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
683 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
684 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
685 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
686 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
687 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
688 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
689 };
690 
691 static const u32 stoney_golden_common_all[] =
692 {
693 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
694 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
695 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
696 	mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
697 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
698 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
699 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
700 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
701 };
702 
703 static const u32 stoney_mgcg_cgcg_init[] =
704 {
705 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
706 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
707 	mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
708 	mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
709 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
710 };
711 
712 
713 static const char * const sq_edc_source_names[] = {
714 	"SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
715 	"SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
716 	"SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
717 	"SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
718 	"SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
719 	"SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
720 	"SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
721 };
722 
723 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
724 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
725 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
726 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
727 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
728 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
729 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
730 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
731 
732 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
733 {
734 	switch (adev->asic_type) {
735 	case CHIP_TOPAZ:
736 		amdgpu_device_program_register_sequence(adev,
737 							iceland_mgcg_cgcg_init,
738 							ARRAY_SIZE(iceland_mgcg_cgcg_init));
739 		amdgpu_device_program_register_sequence(adev,
740 							golden_settings_iceland_a11,
741 							ARRAY_SIZE(golden_settings_iceland_a11));
742 		amdgpu_device_program_register_sequence(adev,
743 							iceland_golden_common_all,
744 							ARRAY_SIZE(iceland_golden_common_all));
745 		break;
746 	case CHIP_FIJI:
747 		amdgpu_device_program_register_sequence(adev,
748 							fiji_mgcg_cgcg_init,
749 							ARRAY_SIZE(fiji_mgcg_cgcg_init));
750 		amdgpu_device_program_register_sequence(adev,
751 							golden_settings_fiji_a10,
752 							ARRAY_SIZE(golden_settings_fiji_a10));
753 		amdgpu_device_program_register_sequence(adev,
754 							fiji_golden_common_all,
755 							ARRAY_SIZE(fiji_golden_common_all));
756 		break;
757 
758 	case CHIP_TONGA:
759 		amdgpu_device_program_register_sequence(adev,
760 							tonga_mgcg_cgcg_init,
761 							ARRAY_SIZE(tonga_mgcg_cgcg_init));
762 		amdgpu_device_program_register_sequence(adev,
763 							golden_settings_tonga_a11,
764 							ARRAY_SIZE(golden_settings_tonga_a11));
765 		amdgpu_device_program_register_sequence(adev,
766 							tonga_golden_common_all,
767 							ARRAY_SIZE(tonga_golden_common_all));
768 		break;
769 	case CHIP_VEGAM:
770 		amdgpu_device_program_register_sequence(adev,
771 							golden_settings_vegam_a11,
772 							ARRAY_SIZE(golden_settings_vegam_a11));
773 		amdgpu_device_program_register_sequence(adev,
774 							vegam_golden_common_all,
775 							ARRAY_SIZE(vegam_golden_common_all));
776 		break;
777 	case CHIP_POLARIS11:
778 	case CHIP_POLARIS12:
779 		amdgpu_device_program_register_sequence(adev,
780 							golden_settings_polaris11_a11,
781 							ARRAY_SIZE(golden_settings_polaris11_a11));
782 		amdgpu_device_program_register_sequence(adev,
783 							polaris11_golden_common_all,
784 							ARRAY_SIZE(polaris11_golden_common_all));
785 		break;
786 	case CHIP_POLARIS10:
787 		amdgpu_device_program_register_sequence(adev,
788 							golden_settings_polaris10_a11,
789 							ARRAY_SIZE(golden_settings_polaris10_a11));
790 		amdgpu_device_program_register_sequence(adev,
791 							polaris10_golden_common_all,
792 							ARRAY_SIZE(polaris10_golden_common_all));
793 		WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
794 		if (adev->pdev->revision == 0xc7 &&
795 		    ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
796 		     (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
797 		     (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
798 			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
799 			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
800 		}
801 		break;
802 	case CHIP_CARRIZO:
803 		amdgpu_device_program_register_sequence(adev,
804 							cz_mgcg_cgcg_init,
805 							ARRAY_SIZE(cz_mgcg_cgcg_init));
806 		amdgpu_device_program_register_sequence(adev,
807 							cz_golden_settings_a11,
808 							ARRAY_SIZE(cz_golden_settings_a11));
809 		amdgpu_device_program_register_sequence(adev,
810 							cz_golden_common_all,
811 							ARRAY_SIZE(cz_golden_common_all));
812 		break;
813 	case CHIP_STONEY:
814 		amdgpu_device_program_register_sequence(adev,
815 							stoney_mgcg_cgcg_init,
816 							ARRAY_SIZE(stoney_mgcg_cgcg_init));
817 		amdgpu_device_program_register_sequence(adev,
818 							stoney_golden_settings_a11,
819 							ARRAY_SIZE(stoney_golden_settings_a11));
820 		amdgpu_device_program_register_sequence(adev,
821 							stoney_golden_common_all,
822 							ARRAY_SIZE(stoney_golden_common_all));
823 		break;
824 	default:
825 		break;
826 	}
827 }
828 
829 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
830 {
831 	adev->gfx.scratch.num_reg = 8;
832 	adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
833 	adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
834 }
835 
836 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
837 {
838 	struct amdgpu_device *adev = ring->adev;
839 	uint32_t scratch;
840 	uint32_t tmp = 0;
841 	unsigned i;
842 	int r;
843 
844 	r = amdgpu_gfx_scratch_get(adev, &scratch);
845 	if (r)
846 		return r;
847 
848 	WREG32(scratch, 0xCAFEDEAD);
849 	r = amdgpu_ring_alloc(ring, 3);
850 	if (r)
851 		goto error_free_scratch;
852 
853 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
854 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
855 	amdgpu_ring_write(ring, 0xDEADBEEF);
856 	amdgpu_ring_commit(ring);
857 
858 	for (i = 0; i < adev->usec_timeout; i++) {
859 		tmp = RREG32(scratch);
860 		if (tmp == 0xDEADBEEF)
861 			break;
862 		udelay(1);
863 	}
864 
865 	if (i >= adev->usec_timeout)
866 		r = -ETIMEDOUT;
867 
868 error_free_scratch:
869 	amdgpu_gfx_scratch_free(adev, scratch);
870 	return r;
871 }
872 
873 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
874 {
875 	struct amdgpu_device *adev = ring->adev;
876 	struct amdgpu_ib ib;
877 	struct dma_fence *f = NULL;
878 
879 	unsigned int index;
880 	uint64_t gpu_addr;
881 	uint32_t tmp;
882 	long r;
883 
884 	r = amdgpu_device_wb_get(adev, &index);
885 	if (r)
886 		return r;
887 
888 	gpu_addr = adev->wb.gpu_addr + (index * 4);
889 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
890 	memset(&ib, 0, sizeof(ib));
891 	r = amdgpu_ib_get(adev, NULL, 16, &ib);
892 	if (r)
893 		goto err1;
894 
895 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
896 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
897 	ib.ptr[2] = lower_32_bits(gpu_addr);
898 	ib.ptr[3] = upper_32_bits(gpu_addr);
899 	ib.ptr[4] = 0xDEADBEEF;
900 	ib.length_dw = 5;
901 
902 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
903 	if (r)
904 		goto err2;
905 
906 	r = dma_fence_wait_timeout(f, false, timeout);
907 	if (r == 0) {
908 		r = -ETIMEDOUT;
909 		goto err2;
910 	} else if (r < 0) {
911 		goto err2;
912 	}
913 
914 	tmp = adev->wb.wb[index];
915 	if (tmp == 0xDEADBEEF)
916 		r = 0;
917 	else
918 		r = -EINVAL;
919 
920 err2:
921 	amdgpu_ib_free(adev, &ib, NULL);
922 	dma_fence_put(f);
923 err1:
924 	amdgpu_device_wb_free(adev, index);
925 	return r;
926 }
927 
928 
929 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
930 {
931 	release_firmware(adev->gfx.pfp_fw);
932 	adev->gfx.pfp_fw = NULL;
933 	release_firmware(adev->gfx.me_fw);
934 	adev->gfx.me_fw = NULL;
935 	release_firmware(adev->gfx.ce_fw);
936 	adev->gfx.ce_fw = NULL;
937 	release_firmware(adev->gfx.rlc_fw);
938 	adev->gfx.rlc_fw = NULL;
939 	release_firmware(adev->gfx.mec_fw);
940 	adev->gfx.mec_fw = NULL;
941 	if ((adev->asic_type != CHIP_STONEY) &&
942 	    (adev->asic_type != CHIP_TOPAZ))
943 		release_firmware(adev->gfx.mec2_fw);
944 	adev->gfx.mec2_fw = NULL;
945 
946 	kfree(adev->gfx.rlc.register_list_format);
947 }
948 
949 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
950 {
951 	const char *chip_name;
952 	char fw_name[30];
953 	int err;
954 	struct amdgpu_firmware_info *info = NULL;
955 	const struct common_firmware_header *header = NULL;
956 	const struct gfx_firmware_header_v1_0 *cp_hdr;
957 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
958 	unsigned int *tmp = NULL, i;
959 
960 	DRM_DEBUG("\n");
961 
962 	switch (adev->asic_type) {
963 	case CHIP_TOPAZ:
964 		chip_name = "topaz";
965 		break;
966 	case CHIP_TONGA:
967 		chip_name = "tonga";
968 		break;
969 	case CHIP_CARRIZO:
970 		chip_name = "carrizo";
971 		break;
972 	case CHIP_FIJI:
973 		chip_name = "fiji";
974 		break;
975 	case CHIP_STONEY:
976 		chip_name = "stoney";
977 		break;
978 	case CHIP_POLARIS10:
979 		chip_name = "polaris10";
980 		break;
981 	case CHIP_POLARIS11:
982 		chip_name = "polaris11";
983 		break;
984 	case CHIP_POLARIS12:
985 		chip_name = "polaris12";
986 		break;
987 	case CHIP_VEGAM:
988 		chip_name = "vegam";
989 		break;
990 	default:
991 		BUG();
992 	}
993 
994 	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
995 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
996 		err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
997 		if (err == -ENOENT) {
998 			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
999 			err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1000 		}
1001 	} else {
1002 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1003 		err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1004 	}
1005 	if (err)
1006 		goto out;
1007 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1008 	if (err)
1009 		goto out;
1010 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1011 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1012 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1013 
1014 	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1015 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1016 		err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1017 		if (err == -ENOENT) {
1018 			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1019 			err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1020 		}
1021 	} else {
1022 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1023 		err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1024 	}
1025 	if (err)
1026 		goto out;
1027 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
1028 	if (err)
1029 		goto out;
1030 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1031 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1032 
1033 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1034 
1035 	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1036 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1037 		err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1038 		if (err == -ENOENT) {
1039 			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1040 			err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1041 		}
1042 	} else {
1043 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1044 		err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1045 	}
1046 	if (err)
1047 		goto out;
1048 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1049 	if (err)
1050 		goto out;
1051 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1052 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1053 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1054 
1055 	/*
1056 	 * Support for MCBP/Virtualization in combination with chained IBs is
1057 	 * formal released on feature version #46
1058 	 */
1059 	if (adev->gfx.ce_feature_version >= 46 &&
1060 	    adev->gfx.pfp_feature_version >= 46) {
1061 		adev->virt.chained_ib_support = true;
1062 		DRM_INFO("Chained IB support enabled!\n");
1063 	} else
1064 		adev->virt.chained_ib_support = false;
1065 
1066 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1067 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1068 	if (err)
1069 		goto out;
1070 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1071 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1072 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1073 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1074 
1075 	adev->gfx.rlc.save_and_restore_offset =
1076 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
1077 	adev->gfx.rlc.clear_state_descriptor_offset =
1078 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1079 	adev->gfx.rlc.avail_scratch_ram_locations =
1080 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1081 	adev->gfx.rlc.reg_restore_list_size =
1082 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
1083 	adev->gfx.rlc.reg_list_format_start =
1084 			le32_to_cpu(rlc_hdr->reg_list_format_start);
1085 	adev->gfx.rlc.reg_list_format_separate_start =
1086 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1087 	adev->gfx.rlc.starting_offsets_start =
1088 			le32_to_cpu(rlc_hdr->starting_offsets_start);
1089 	adev->gfx.rlc.reg_list_format_size_bytes =
1090 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1091 	adev->gfx.rlc.reg_list_size_bytes =
1092 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1093 
1094 	adev->gfx.rlc.register_list_format =
1095 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1096 					adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1097 
1098 	if (!adev->gfx.rlc.register_list_format) {
1099 		err = -ENOMEM;
1100 		goto out;
1101 	}
1102 
1103 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1104 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1105 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1106 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
1107 
1108 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1109 
1110 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1111 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1112 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1113 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1114 
1115 	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1116 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1117 		err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1118 		if (err == -ENOENT) {
1119 			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1120 			err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1121 		}
1122 	} else {
1123 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1124 		err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1125 	}
1126 	if (err)
1127 		goto out;
1128 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1129 	if (err)
1130 		goto out;
1131 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1132 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1133 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1134 
1135 	if ((adev->asic_type != CHIP_STONEY) &&
1136 	    (adev->asic_type != CHIP_TOPAZ)) {
1137 		if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1138 			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1139 			err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1140 			if (err == -ENOENT) {
1141 				snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1142 				err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1143 			}
1144 		} else {
1145 			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1146 			err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1147 		}
1148 		if (!err) {
1149 			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1150 			if (err)
1151 				goto out;
1152 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1153 				adev->gfx.mec2_fw->data;
1154 			adev->gfx.mec2_fw_version =
1155 				le32_to_cpu(cp_hdr->header.ucode_version);
1156 			adev->gfx.mec2_feature_version =
1157 				le32_to_cpu(cp_hdr->ucode_feature_version);
1158 		} else {
1159 			err = 0;
1160 			adev->gfx.mec2_fw = NULL;
1161 		}
1162 	}
1163 
1164 	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1165 	info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1166 	info->fw = adev->gfx.pfp_fw;
1167 	header = (const struct common_firmware_header *)info->fw->data;
1168 	adev->firmware.fw_size +=
1169 		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1170 
1171 	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1172 	info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1173 	info->fw = adev->gfx.me_fw;
1174 	header = (const struct common_firmware_header *)info->fw->data;
1175 	adev->firmware.fw_size +=
1176 		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1177 
1178 	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1179 	info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1180 	info->fw = adev->gfx.ce_fw;
1181 	header = (const struct common_firmware_header *)info->fw->data;
1182 	adev->firmware.fw_size +=
1183 		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1184 
1185 	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1186 	info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1187 	info->fw = adev->gfx.rlc_fw;
1188 	header = (const struct common_firmware_header *)info->fw->data;
1189 	adev->firmware.fw_size +=
1190 		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1191 
1192 	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1193 	info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1194 	info->fw = adev->gfx.mec_fw;
1195 	header = (const struct common_firmware_header *)info->fw->data;
1196 	adev->firmware.fw_size +=
1197 		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1198 
1199 	/* we need account JT in */
1200 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1201 	adev->firmware.fw_size +=
1202 		ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1203 
1204 	if (amdgpu_sriov_vf(adev)) {
1205 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1206 		info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1207 		info->fw = adev->gfx.mec_fw;
1208 		adev->firmware.fw_size +=
1209 			ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1210 	}
1211 
1212 	if (adev->gfx.mec2_fw) {
1213 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1214 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1215 		info->fw = adev->gfx.mec2_fw;
1216 		header = (const struct common_firmware_header *)info->fw->data;
1217 		adev->firmware.fw_size +=
1218 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1219 	}
1220 
1221 out:
1222 	if (err) {
1223 		dev_err(adev->dev,
1224 			"gfx8: Failed to load firmware \"%s\"\n",
1225 			fw_name);
1226 		release_firmware(adev->gfx.pfp_fw);
1227 		adev->gfx.pfp_fw = NULL;
1228 		release_firmware(adev->gfx.me_fw);
1229 		adev->gfx.me_fw = NULL;
1230 		release_firmware(adev->gfx.ce_fw);
1231 		adev->gfx.ce_fw = NULL;
1232 		release_firmware(adev->gfx.rlc_fw);
1233 		adev->gfx.rlc_fw = NULL;
1234 		release_firmware(adev->gfx.mec_fw);
1235 		adev->gfx.mec_fw = NULL;
1236 		release_firmware(adev->gfx.mec2_fw);
1237 		adev->gfx.mec2_fw = NULL;
1238 	}
1239 	return err;
1240 }
1241 
1242 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1243 				    volatile u32 *buffer)
1244 {
1245 	u32 count = 0, i;
1246 	const struct cs_section_def *sect = NULL;
1247 	const struct cs_extent_def *ext = NULL;
1248 
1249 	if (adev->gfx.rlc.cs_data == NULL)
1250 		return;
1251 	if (buffer == NULL)
1252 		return;
1253 
1254 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1255 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1256 
1257 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1258 	buffer[count++] = cpu_to_le32(0x80000000);
1259 	buffer[count++] = cpu_to_le32(0x80000000);
1260 
1261 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1262 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1263 			if (sect->id == SECT_CONTEXT) {
1264 				buffer[count++] =
1265 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1266 				buffer[count++] = cpu_to_le32(ext->reg_index -
1267 						PACKET3_SET_CONTEXT_REG_START);
1268 				for (i = 0; i < ext->reg_count; i++)
1269 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1270 			} else {
1271 				return;
1272 			}
1273 		}
1274 	}
1275 
1276 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1277 	buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1278 			PACKET3_SET_CONTEXT_REG_START);
1279 	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1280 	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1281 
1282 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1283 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1284 
1285 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1286 	buffer[count++] = cpu_to_le32(0);
1287 }
1288 
1289 static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
1290 {
1291 	if (adev->asic_type == CHIP_CARRIZO)
1292 		return 5;
1293 	else
1294 		return 4;
1295 }
1296 
1297 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1298 {
1299 	const struct cs_section_def *cs_data;
1300 	int r;
1301 
1302 	adev->gfx.rlc.cs_data = vi_cs_data;
1303 
1304 	cs_data = adev->gfx.rlc.cs_data;
1305 
1306 	if (cs_data) {
1307 		/* init clear state block */
1308 		r = amdgpu_gfx_rlc_init_csb(adev);
1309 		if (r)
1310 			return r;
1311 	}
1312 
1313 	if ((adev->asic_type == CHIP_CARRIZO) ||
1314 	    (adev->asic_type == CHIP_STONEY)) {
1315 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1316 		r = amdgpu_gfx_rlc_init_cpt(adev);
1317 		if (r)
1318 			return r;
1319 	}
1320 
1321 	return 0;
1322 }
1323 
1324 static int gfx_v8_0_csb_vram_pin(struct amdgpu_device *adev)
1325 {
1326 	int r;
1327 
1328 	r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1329 	if (unlikely(r != 0))
1330 		return r;
1331 
1332 	r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1333 			AMDGPU_GEM_DOMAIN_VRAM);
1334 	if (!r)
1335 		adev->gfx.rlc.clear_state_gpu_addr =
1336 			amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1337 
1338 	amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1339 
1340 	return r;
1341 }
1342 
1343 static void gfx_v8_0_csb_vram_unpin(struct amdgpu_device *adev)
1344 {
1345 	int r;
1346 
1347 	if (!adev->gfx.rlc.clear_state_obj)
1348 		return;
1349 
1350 	r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1351 	if (likely(r == 0)) {
1352 		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1353 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1354 	}
1355 }
1356 
1357 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1358 {
1359 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1360 }
1361 
1362 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1363 {
1364 	int r;
1365 	u32 *hpd;
1366 	size_t mec_hpd_size;
1367 
1368 	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1369 
1370 	/* take ownership of the relevant compute queues */
1371 	amdgpu_gfx_compute_queue_acquire(adev);
1372 
1373 	mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1374 
1375 	r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1376 				      AMDGPU_GEM_DOMAIN_VRAM,
1377 				      &adev->gfx.mec.hpd_eop_obj,
1378 				      &adev->gfx.mec.hpd_eop_gpu_addr,
1379 				      (void **)&hpd);
1380 	if (r) {
1381 		dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1382 		return r;
1383 	}
1384 
1385 	memset(hpd, 0, mec_hpd_size);
1386 
1387 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1388 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1389 
1390 	return 0;
1391 }
1392 
1393 static const u32 vgpr_init_compute_shader[] =
1394 {
1395 	0x7e000209, 0x7e020208,
1396 	0x7e040207, 0x7e060206,
1397 	0x7e080205, 0x7e0a0204,
1398 	0x7e0c0203, 0x7e0e0202,
1399 	0x7e100201, 0x7e120200,
1400 	0x7e140209, 0x7e160208,
1401 	0x7e180207, 0x7e1a0206,
1402 	0x7e1c0205, 0x7e1e0204,
1403 	0x7e200203, 0x7e220202,
1404 	0x7e240201, 0x7e260200,
1405 	0x7e280209, 0x7e2a0208,
1406 	0x7e2c0207, 0x7e2e0206,
1407 	0x7e300205, 0x7e320204,
1408 	0x7e340203, 0x7e360202,
1409 	0x7e380201, 0x7e3a0200,
1410 	0x7e3c0209, 0x7e3e0208,
1411 	0x7e400207, 0x7e420206,
1412 	0x7e440205, 0x7e460204,
1413 	0x7e480203, 0x7e4a0202,
1414 	0x7e4c0201, 0x7e4e0200,
1415 	0x7e500209, 0x7e520208,
1416 	0x7e540207, 0x7e560206,
1417 	0x7e580205, 0x7e5a0204,
1418 	0x7e5c0203, 0x7e5e0202,
1419 	0x7e600201, 0x7e620200,
1420 	0x7e640209, 0x7e660208,
1421 	0x7e680207, 0x7e6a0206,
1422 	0x7e6c0205, 0x7e6e0204,
1423 	0x7e700203, 0x7e720202,
1424 	0x7e740201, 0x7e760200,
1425 	0x7e780209, 0x7e7a0208,
1426 	0x7e7c0207, 0x7e7e0206,
1427 	0xbf8a0000, 0xbf810000,
1428 };
1429 
1430 static const u32 sgpr_init_compute_shader[] =
1431 {
1432 	0xbe8a0100, 0xbe8c0102,
1433 	0xbe8e0104, 0xbe900106,
1434 	0xbe920108, 0xbe940100,
1435 	0xbe960102, 0xbe980104,
1436 	0xbe9a0106, 0xbe9c0108,
1437 	0xbe9e0100, 0xbea00102,
1438 	0xbea20104, 0xbea40106,
1439 	0xbea60108, 0xbea80100,
1440 	0xbeaa0102, 0xbeac0104,
1441 	0xbeae0106, 0xbeb00108,
1442 	0xbeb20100, 0xbeb40102,
1443 	0xbeb60104, 0xbeb80106,
1444 	0xbeba0108, 0xbebc0100,
1445 	0xbebe0102, 0xbec00104,
1446 	0xbec20106, 0xbec40108,
1447 	0xbec60100, 0xbec80102,
1448 	0xbee60004, 0xbee70005,
1449 	0xbeea0006, 0xbeeb0007,
1450 	0xbee80008, 0xbee90009,
1451 	0xbefc0000, 0xbf8a0000,
1452 	0xbf810000, 0x00000000,
1453 };
1454 
1455 static const u32 vgpr_init_regs[] =
1456 {
1457 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1458 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1459 	mmCOMPUTE_NUM_THREAD_X, 256*4,
1460 	mmCOMPUTE_NUM_THREAD_Y, 1,
1461 	mmCOMPUTE_NUM_THREAD_Z, 1,
1462 	mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1463 	mmCOMPUTE_PGM_RSRC2, 20,
1464 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1465 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1466 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1467 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1468 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1469 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1470 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1471 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1472 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1473 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1474 };
1475 
1476 static const u32 sgpr1_init_regs[] =
1477 {
1478 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1479 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1480 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1481 	mmCOMPUTE_NUM_THREAD_Y, 1,
1482 	mmCOMPUTE_NUM_THREAD_Z, 1,
1483 	mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1484 	mmCOMPUTE_PGM_RSRC2, 20,
1485 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1486 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1487 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1488 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1489 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1490 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1491 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1492 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1493 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1494 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1495 };
1496 
1497 static const u32 sgpr2_init_regs[] =
1498 {
1499 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1500 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1501 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1502 	mmCOMPUTE_NUM_THREAD_Y, 1,
1503 	mmCOMPUTE_NUM_THREAD_Z, 1,
1504 	mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1505 	mmCOMPUTE_PGM_RSRC2, 20,
1506 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1507 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1508 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1509 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1510 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1511 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1512 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1513 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1514 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1515 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1516 };
1517 
1518 static const u32 sec_ded_counter_registers[] =
1519 {
1520 	mmCPC_EDC_ATC_CNT,
1521 	mmCPC_EDC_SCRATCH_CNT,
1522 	mmCPC_EDC_UCODE_CNT,
1523 	mmCPF_EDC_ATC_CNT,
1524 	mmCPF_EDC_ROQ_CNT,
1525 	mmCPF_EDC_TAG_CNT,
1526 	mmCPG_EDC_ATC_CNT,
1527 	mmCPG_EDC_DMA_CNT,
1528 	mmCPG_EDC_TAG_CNT,
1529 	mmDC_EDC_CSINVOC_CNT,
1530 	mmDC_EDC_RESTORE_CNT,
1531 	mmDC_EDC_STATE_CNT,
1532 	mmGDS_EDC_CNT,
1533 	mmGDS_EDC_GRBM_CNT,
1534 	mmGDS_EDC_OA_DED,
1535 	mmSPI_EDC_CNT,
1536 	mmSQC_ATC_EDC_GATCL1_CNT,
1537 	mmSQC_EDC_CNT,
1538 	mmSQ_EDC_DED_CNT,
1539 	mmSQ_EDC_INFO,
1540 	mmSQ_EDC_SEC_CNT,
1541 	mmTCC_EDC_CNT,
1542 	mmTCP_ATC_EDC_GATCL1_CNT,
1543 	mmTCP_EDC_CNT,
1544 	mmTD_EDC_CNT
1545 };
1546 
1547 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1548 {
1549 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1550 	struct amdgpu_ib ib;
1551 	struct dma_fence *f = NULL;
1552 	int r, i;
1553 	u32 tmp;
1554 	unsigned total_size, vgpr_offset, sgpr_offset;
1555 	u64 gpu_addr;
1556 
1557 	/* only supported on CZ */
1558 	if (adev->asic_type != CHIP_CARRIZO)
1559 		return 0;
1560 
1561 	/* bail if the compute ring is not ready */
1562 	if (!ring->sched.ready)
1563 		return 0;
1564 
1565 	tmp = RREG32(mmGB_EDC_MODE);
1566 	WREG32(mmGB_EDC_MODE, 0);
1567 
1568 	total_size =
1569 		(((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1570 	total_size +=
1571 		(((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1572 	total_size +=
1573 		(((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1574 	total_size = ALIGN(total_size, 256);
1575 	vgpr_offset = total_size;
1576 	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1577 	sgpr_offset = total_size;
1578 	total_size += sizeof(sgpr_init_compute_shader);
1579 
1580 	/* allocate an indirect buffer to put the commands in */
1581 	memset(&ib, 0, sizeof(ib));
1582 	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1583 	if (r) {
1584 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1585 		return r;
1586 	}
1587 
1588 	/* load the compute shaders */
1589 	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1590 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1591 
1592 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1593 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1594 
1595 	/* init the ib length to 0 */
1596 	ib.length_dw = 0;
1597 
1598 	/* VGPR */
1599 	/* write the register state for the compute dispatch */
1600 	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1601 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1602 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1603 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1604 	}
1605 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1606 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1607 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1608 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1609 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1610 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1611 
1612 	/* write dispatch packet */
1613 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1614 	ib.ptr[ib.length_dw++] = 8; /* x */
1615 	ib.ptr[ib.length_dw++] = 1; /* y */
1616 	ib.ptr[ib.length_dw++] = 1; /* z */
1617 	ib.ptr[ib.length_dw++] =
1618 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1619 
1620 	/* write CS partial flush packet */
1621 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1622 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1623 
1624 	/* SGPR1 */
1625 	/* write the register state for the compute dispatch */
1626 	for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1627 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1628 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1629 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1630 	}
1631 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1632 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1633 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1634 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1635 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1636 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1637 
1638 	/* write dispatch packet */
1639 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1640 	ib.ptr[ib.length_dw++] = 8; /* x */
1641 	ib.ptr[ib.length_dw++] = 1; /* y */
1642 	ib.ptr[ib.length_dw++] = 1; /* z */
1643 	ib.ptr[ib.length_dw++] =
1644 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1645 
1646 	/* write CS partial flush packet */
1647 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1648 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1649 
1650 	/* SGPR2 */
1651 	/* write the register state for the compute dispatch */
1652 	for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1653 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1654 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1655 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1656 	}
1657 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1658 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1659 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1660 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1661 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1662 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1663 
1664 	/* write dispatch packet */
1665 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1666 	ib.ptr[ib.length_dw++] = 8; /* x */
1667 	ib.ptr[ib.length_dw++] = 1; /* y */
1668 	ib.ptr[ib.length_dw++] = 1; /* z */
1669 	ib.ptr[ib.length_dw++] =
1670 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1671 
1672 	/* write CS partial flush packet */
1673 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1674 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1675 
1676 	/* shedule the ib on the ring */
1677 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1678 	if (r) {
1679 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1680 		goto fail;
1681 	}
1682 
1683 	/* wait for the GPU to finish processing the IB */
1684 	r = dma_fence_wait(f, false);
1685 	if (r) {
1686 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1687 		goto fail;
1688 	}
1689 
1690 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1691 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1692 	WREG32(mmGB_EDC_MODE, tmp);
1693 
1694 	tmp = RREG32(mmCC_GC_EDC_CONFIG);
1695 	tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1696 	WREG32(mmCC_GC_EDC_CONFIG, tmp);
1697 
1698 
1699 	/* read back registers to clear the counters */
1700 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1701 		RREG32(sec_ded_counter_registers[i]);
1702 
1703 fail:
1704 	amdgpu_ib_free(adev, &ib, NULL);
1705 	dma_fence_put(f);
1706 
1707 	return r;
1708 }
1709 
1710 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1711 {
1712 	u32 gb_addr_config;
1713 	u32 mc_shared_chmap, mc_arb_ramcfg;
1714 	u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1715 	u32 tmp;
1716 	int ret;
1717 
1718 	switch (adev->asic_type) {
1719 	case CHIP_TOPAZ:
1720 		adev->gfx.config.max_shader_engines = 1;
1721 		adev->gfx.config.max_tile_pipes = 2;
1722 		adev->gfx.config.max_cu_per_sh = 6;
1723 		adev->gfx.config.max_sh_per_se = 1;
1724 		adev->gfx.config.max_backends_per_se = 2;
1725 		adev->gfx.config.max_texture_channel_caches = 2;
1726 		adev->gfx.config.max_gprs = 256;
1727 		adev->gfx.config.max_gs_threads = 32;
1728 		adev->gfx.config.max_hw_contexts = 8;
1729 
1730 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1731 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1732 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1733 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1734 		gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1735 		break;
1736 	case CHIP_FIJI:
1737 		adev->gfx.config.max_shader_engines = 4;
1738 		adev->gfx.config.max_tile_pipes = 16;
1739 		adev->gfx.config.max_cu_per_sh = 16;
1740 		adev->gfx.config.max_sh_per_se = 1;
1741 		adev->gfx.config.max_backends_per_se = 4;
1742 		adev->gfx.config.max_texture_channel_caches = 16;
1743 		adev->gfx.config.max_gprs = 256;
1744 		adev->gfx.config.max_gs_threads = 32;
1745 		adev->gfx.config.max_hw_contexts = 8;
1746 
1747 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1748 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1749 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1750 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1751 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1752 		break;
1753 	case CHIP_POLARIS11:
1754 	case CHIP_POLARIS12:
1755 		ret = amdgpu_atombios_get_gfx_info(adev);
1756 		if (ret)
1757 			return ret;
1758 		adev->gfx.config.max_gprs = 256;
1759 		adev->gfx.config.max_gs_threads = 32;
1760 		adev->gfx.config.max_hw_contexts = 8;
1761 
1762 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1763 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1764 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1765 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1766 		gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1767 		break;
1768 	case CHIP_POLARIS10:
1769 	case CHIP_VEGAM:
1770 		ret = amdgpu_atombios_get_gfx_info(adev);
1771 		if (ret)
1772 			return ret;
1773 		adev->gfx.config.max_gprs = 256;
1774 		adev->gfx.config.max_gs_threads = 32;
1775 		adev->gfx.config.max_hw_contexts = 8;
1776 
1777 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1778 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1779 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1780 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1781 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1782 		break;
1783 	case CHIP_TONGA:
1784 		adev->gfx.config.max_shader_engines = 4;
1785 		adev->gfx.config.max_tile_pipes = 8;
1786 		adev->gfx.config.max_cu_per_sh = 8;
1787 		adev->gfx.config.max_sh_per_se = 1;
1788 		adev->gfx.config.max_backends_per_se = 2;
1789 		adev->gfx.config.max_texture_channel_caches = 8;
1790 		adev->gfx.config.max_gprs = 256;
1791 		adev->gfx.config.max_gs_threads = 32;
1792 		adev->gfx.config.max_hw_contexts = 8;
1793 
1794 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1795 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1796 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1797 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1798 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1799 		break;
1800 	case CHIP_CARRIZO:
1801 		adev->gfx.config.max_shader_engines = 1;
1802 		adev->gfx.config.max_tile_pipes = 2;
1803 		adev->gfx.config.max_sh_per_se = 1;
1804 		adev->gfx.config.max_backends_per_se = 2;
1805 		adev->gfx.config.max_cu_per_sh = 8;
1806 		adev->gfx.config.max_texture_channel_caches = 2;
1807 		adev->gfx.config.max_gprs = 256;
1808 		adev->gfx.config.max_gs_threads = 32;
1809 		adev->gfx.config.max_hw_contexts = 8;
1810 
1811 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1812 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1813 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1814 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1815 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1816 		break;
1817 	case CHIP_STONEY:
1818 		adev->gfx.config.max_shader_engines = 1;
1819 		adev->gfx.config.max_tile_pipes = 2;
1820 		adev->gfx.config.max_sh_per_se = 1;
1821 		adev->gfx.config.max_backends_per_se = 1;
1822 		adev->gfx.config.max_cu_per_sh = 3;
1823 		adev->gfx.config.max_texture_channel_caches = 2;
1824 		adev->gfx.config.max_gprs = 256;
1825 		adev->gfx.config.max_gs_threads = 16;
1826 		adev->gfx.config.max_hw_contexts = 8;
1827 
1828 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1829 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1830 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1831 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1832 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1833 		break;
1834 	default:
1835 		adev->gfx.config.max_shader_engines = 2;
1836 		adev->gfx.config.max_tile_pipes = 4;
1837 		adev->gfx.config.max_cu_per_sh = 2;
1838 		adev->gfx.config.max_sh_per_se = 1;
1839 		adev->gfx.config.max_backends_per_se = 2;
1840 		adev->gfx.config.max_texture_channel_caches = 4;
1841 		adev->gfx.config.max_gprs = 256;
1842 		adev->gfx.config.max_gs_threads = 32;
1843 		adev->gfx.config.max_hw_contexts = 8;
1844 
1845 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1846 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1847 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1848 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1849 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1850 		break;
1851 	}
1852 
1853 	mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1854 	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1855 	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1856 
1857 	adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1858 	adev->gfx.config.mem_max_burst_length_bytes = 256;
1859 	if (adev->flags & AMD_IS_APU) {
1860 		/* Get memory bank mapping mode. */
1861 		tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1862 		dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1863 		dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1864 
1865 		tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1866 		dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1867 		dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1868 
1869 		/* Validate settings in case only one DIMM installed. */
1870 		if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1871 			dimm00_addr_map = 0;
1872 		if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1873 			dimm01_addr_map = 0;
1874 		if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1875 			dimm10_addr_map = 0;
1876 		if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1877 			dimm11_addr_map = 0;
1878 
1879 		/* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1880 		/* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1881 		if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1882 			adev->gfx.config.mem_row_size_in_kb = 2;
1883 		else
1884 			adev->gfx.config.mem_row_size_in_kb = 1;
1885 	} else {
1886 		tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1887 		adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1888 		if (adev->gfx.config.mem_row_size_in_kb > 4)
1889 			adev->gfx.config.mem_row_size_in_kb = 4;
1890 	}
1891 
1892 	adev->gfx.config.shader_engine_tile_size = 32;
1893 	adev->gfx.config.num_gpus = 1;
1894 	adev->gfx.config.multi_gpu_tile_size = 64;
1895 
1896 	/* fix up row size */
1897 	switch (adev->gfx.config.mem_row_size_in_kb) {
1898 	case 1:
1899 	default:
1900 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1901 		break;
1902 	case 2:
1903 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1904 		break;
1905 	case 4:
1906 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1907 		break;
1908 	}
1909 	adev->gfx.config.gb_addr_config = gb_addr_config;
1910 
1911 	return 0;
1912 }
1913 
1914 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1915 					int mec, int pipe, int queue)
1916 {
1917 	int r;
1918 	unsigned irq_type;
1919 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1920 
1921 	ring = &adev->gfx.compute_ring[ring_id];
1922 
1923 	/* mec0 is me1 */
1924 	ring->me = mec + 1;
1925 	ring->pipe = pipe;
1926 	ring->queue = queue;
1927 
1928 	ring->ring_obj = NULL;
1929 	ring->use_doorbell = true;
1930 	ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
1931 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1932 				+ (ring_id * GFX8_MEC_HPD_SIZE);
1933 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1934 
1935 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1936 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1937 		+ ring->pipe;
1938 
1939 	/* type-2 packets are deprecated on MEC, use type-3 instead */
1940 	r = amdgpu_ring_init(adev, ring, 1024,
1941 			&adev->gfx.eop_irq, irq_type);
1942 	if (r)
1943 		return r;
1944 
1945 
1946 	return 0;
1947 }
1948 
1949 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
1950 
1951 static int gfx_v8_0_sw_init(void *handle)
1952 {
1953 	int i, j, k, r, ring_id;
1954 	struct amdgpu_ring *ring;
1955 	struct amdgpu_kiq *kiq;
1956 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1957 
1958 	switch (adev->asic_type) {
1959 	case CHIP_TONGA:
1960 	case CHIP_CARRIZO:
1961 	case CHIP_FIJI:
1962 	case CHIP_POLARIS10:
1963 	case CHIP_POLARIS11:
1964 	case CHIP_POLARIS12:
1965 	case CHIP_VEGAM:
1966 		adev->gfx.mec.num_mec = 2;
1967 		break;
1968 	case CHIP_TOPAZ:
1969 	case CHIP_STONEY:
1970 	default:
1971 		adev->gfx.mec.num_mec = 1;
1972 		break;
1973 	}
1974 
1975 	adev->gfx.mec.num_pipe_per_mec = 4;
1976 	adev->gfx.mec.num_queue_per_pipe = 8;
1977 
1978 	/* EOP Event */
1979 	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
1980 	if (r)
1981 		return r;
1982 
1983 	/* Privileged reg */
1984 	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
1985 			      &adev->gfx.priv_reg_irq);
1986 	if (r)
1987 		return r;
1988 
1989 	/* Privileged inst */
1990 	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
1991 			      &adev->gfx.priv_inst_irq);
1992 	if (r)
1993 		return r;
1994 
1995 	/* Add CP EDC/ECC irq  */
1996 	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
1997 			      &adev->gfx.cp_ecc_error_irq);
1998 	if (r)
1999 		return r;
2000 
2001 	/* SQ interrupts. */
2002 	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
2003 			      &adev->gfx.sq_irq);
2004 	if (r) {
2005 		DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
2006 		return r;
2007 	}
2008 
2009 	INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
2010 
2011 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2012 
2013 	gfx_v8_0_scratch_init(adev);
2014 
2015 	r = gfx_v8_0_init_microcode(adev);
2016 	if (r) {
2017 		DRM_ERROR("Failed to load gfx firmware!\n");
2018 		return r;
2019 	}
2020 
2021 	r = adev->gfx.rlc.funcs->init(adev);
2022 	if (r) {
2023 		DRM_ERROR("Failed to init rlc BOs!\n");
2024 		return r;
2025 	}
2026 
2027 	r = gfx_v8_0_mec_init(adev);
2028 	if (r) {
2029 		DRM_ERROR("Failed to init MEC BOs!\n");
2030 		return r;
2031 	}
2032 
2033 	/* set up the gfx ring */
2034 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2035 		ring = &adev->gfx.gfx_ring[i];
2036 		ring->ring_obj = NULL;
2037 		sprintf(ring->name, "gfx");
2038 		/* no gfx doorbells on iceland */
2039 		if (adev->asic_type != CHIP_TOPAZ) {
2040 			ring->use_doorbell = true;
2041 			ring->doorbell_index = adev->doorbell_index.gfx_ring0;
2042 		}
2043 
2044 		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2045 				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
2046 		if (r)
2047 			return r;
2048 	}
2049 
2050 
2051 	/* set up the compute queues - allocate horizontally across pipes */
2052 	ring_id = 0;
2053 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2054 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2055 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2056 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2057 					continue;
2058 
2059 				r = gfx_v8_0_compute_ring_init(adev,
2060 								ring_id,
2061 								i, k, j);
2062 				if (r)
2063 					return r;
2064 
2065 				ring_id++;
2066 			}
2067 		}
2068 	}
2069 
2070 	r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2071 	if (r) {
2072 		DRM_ERROR("Failed to init KIQ BOs!\n");
2073 		return r;
2074 	}
2075 
2076 	kiq = &adev->gfx.kiq;
2077 	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2078 	if (r)
2079 		return r;
2080 
2081 	/* create MQD for all compute queues as well as KIQ for SRIOV case */
2082 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2083 	if (r)
2084 		return r;
2085 
2086 	adev->gfx.ce_ram_size = 0x8000;
2087 
2088 	r = gfx_v8_0_gpu_early_init(adev);
2089 	if (r)
2090 		return r;
2091 
2092 	return 0;
2093 }
2094 
2095 static int gfx_v8_0_sw_fini(void *handle)
2096 {
2097 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2098 	int i;
2099 
2100 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2101 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2102 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2103 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2104 
2105 	amdgpu_gfx_mqd_sw_fini(adev);
2106 	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2107 	amdgpu_gfx_kiq_fini(adev);
2108 
2109 	gfx_v8_0_mec_fini(adev);
2110 	amdgpu_gfx_rlc_fini(adev);
2111 	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2112 				&adev->gfx.rlc.clear_state_gpu_addr,
2113 				(void **)&adev->gfx.rlc.cs_ptr);
2114 	if ((adev->asic_type == CHIP_CARRIZO) ||
2115 	    (adev->asic_type == CHIP_STONEY)) {
2116 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2117 				&adev->gfx.rlc.cp_table_gpu_addr,
2118 				(void **)&adev->gfx.rlc.cp_table_ptr);
2119 	}
2120 	gfx_v8_0_free_microcode(adev);
2121 
2122 	return 0;
2123 }
2124 
2125 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2126 {
2127 	uint32_t *modearray, *mod2array;
2128 	const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2129 	const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2130 	u32 reg_offset;
2131 
2132 	modearray = adev->gfx.config.tile_mode_array;
2133 	mod2array = adev->gfx.config.macrotile_mode_array;
2134 
2135 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2136 		modearray[reg_offset] = 0;
2137 
2138 	for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2139 		mod2array[reg_offset] = 0;
2140 
2141 	switch (adev->asic_type) {
2142 	case CHIP_TOPAZ:
2143 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2144 				PIPE_CONFIG(ADDR_SURF_P2) |
2145 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2146 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2147 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2148 				PIPE_CONFIG(ADDR_SURF_P2) |
2149 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2150 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2151 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2152 				PIPE_CONFIG(ADDR_SURF_P2) |
2153 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2154 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2155 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2156 				PIPE_CONFIG(ADDR_SURF_P2) |
2157 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2158 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2159 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2160 				PIPE_CONFIG(ADDR_SURF_P2) |
2161 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2162 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2163 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2164 				PIPE_CONFIG(ADDR_SURF_P2) |
2165 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2166 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2167 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2168 				PIPE_CONFIG(ADDR_SURF_P2) |
2169 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2170 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2171 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2172 				PIPE_CONFIG(ADDR_SURF_P2));
2173 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2174 				PIPE_CONFIG(ADDR_SURF_P2) |
2175 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2176 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2177 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2178 				 PIPE_CONFIG(ADDR_SURF_P2) |
2179 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2180 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2181 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2182 				 PIPE_CONFIG(ADDR_SURF_P2) |
2183 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2184 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2185 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2186 				 PIPE_CONFIG(ADDR_SURF_P2) |
2187 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2188 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2189 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2190 				 PIPE_CONFIG(ADDR_SURF_P2) |
2191 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2192 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2193 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2194 				 PIPE_CONFIG(ADDR_SURF_P2) |
2195 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2196 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2197 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2198 				 PIPE_CONFIG(ADDR_SURF_P2) |
2199 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2200 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2201 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2202 				 PIPE_CONFIG(ADDR_SURF_P2) |
2203 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2204 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2205 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2206 				 PIPE_CONFIG(ADDR_SURF_P2) |
2207 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2208 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2209 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2210 				 PIPE_CONFIG(ADDR_SURF_P2) |
2211 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2212 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2213 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2214 				 PIPE_CONFIG(ADDR_SURF_P2) |
2215 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2216 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2217 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2218 				 PIPE_CONFIG(ADDR_SURF_P2) |
2219 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2220 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2221 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2222 				 PIPE_CONFIG(ADDR_SURF_P2) |
2223 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2224 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2225 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2226 				 PIPE_CONFIG(ADDR_SURF_P2) |
2227 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2228 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2229 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2230 				 PIPE_CONFIG(ADDR_SURF_P2) |
2231 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2232 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2233 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2234 				 PIPE_CONFIG(ADDR_SURF_P2) |
2235 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2236 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2237 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2238 				 PIPE_CONFIG(ADDR_SURF_P2) |
2239 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2240 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2241 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2242 				 PIPE_CONFIG(ADDR_SURF_P2) |
2243 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2244 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2245 
2246 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2247 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2248 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2249 				NUM_BANKS(ADDR_SURF_8_BANK));
2250 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2251 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2252 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2253 				NUM_BANKS(ADDR_SURF_8_BANK));
2254 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2255 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2256 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2257 				NUM_BANKS(ADDR_SURF_8_BANK));
2258 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2259 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2260 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2261 				NUM_BANKS(ADDR_SURF_8_BANK));
2262 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2263 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2264 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2265 				NUM_BANKS(ADDR_SURF_8_BANK));
2266 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2267 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2268 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2269 				NUM_BANKS(ADDR_SURF_8_BANK));
2270 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2271 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2272 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2273 				NUM_BANKS(ADDR_SURF_8_BANK));
2274 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2275 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2276 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2277 				NUM_BANKS(ADDR_SURF_16_BANK));
2278 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2279 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2280 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2281 				NUM_BANKS(ADDR_SURF_16_BANK));
2282 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2283 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2284 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2285 				 NUM_BANKS(ADDR_SURF_16_BANK));
2286 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2287 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2288 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2289 				 NUM_BANKS(ADDR_SURF_16_BANK));
2290 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2291 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2292 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2293 				 NUM_BANKS(ADDR_SURF_16_BANK));
2294 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2295 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2296 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2297 				 NUM_BANKS(ADDR_SURF_16_BANK));
2298 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2299 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2300 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2301 				 NUM_BANKS(ADDR_SURF_8_BANK));
2302 
2303 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2304 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2305 			    reg_offset != 23)
2306 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2307 
2308 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2309 			if (reg_offset != 7)
2310 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2311 
2312 		break;
2313 	case CHIP_FIJI:
2314 	case CHIP_VEGAM:
2315 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2316 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2317 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2318 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2319 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2320 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2321 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2322 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2323 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2324 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2325 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2326 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2327 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2328 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2329 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2330 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2331 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2332 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2333 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2334 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2335 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2336 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2337 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2338 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2339 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2340 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2341 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2342 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2343 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2344 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2345 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2346 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2347 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2348 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2349 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2350 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2351 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2352 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2353 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2354 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2355 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2356 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2357 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2358 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2359 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2360 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2361 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2362 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2363 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2364 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2365 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2366 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2367 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2368 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2369 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2370 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2371 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2372 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2373 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2374 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2375 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2376 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2377 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2378 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2379 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2380 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2381 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2382 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2383 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2384 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2385 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2386 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2387 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2388 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2389 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2390 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2391 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2392 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2393 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2394 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2395 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2396 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2397 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2398 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2399 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2400 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2401 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2402 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2403 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2404 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2405 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2406 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2407 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2408 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2409 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2410 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2411 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2412 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2413 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2414 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2415 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2416 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2417 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2418 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2419 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2420 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2421 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2422 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2423 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2424 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2425 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2426 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2427 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2428 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2429 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2430 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2431 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2432 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2433 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2434 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2435 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2436 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2437 
2438 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2439 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2440 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2441 				NUM_BANKS(ADDR_SURF_8_BANK));
2442 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2443 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2444 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2445 				NUM_BANKS(ADDR_SURF_8_BANK));
2446 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2447 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2448 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2449 				NUM_BANKS(ADDR_SURF_8_BANK));
2450 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2451 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2452 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2453 				NUM_BANKS(ADDR_SURF_8_BANK));
2454 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2455 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2456 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2457 				NUM_BANKS(ADDR_SURF_8_BANK));
2458 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2459 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2460 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2461 				NUM_BANKS(ADDR_SURF_8_BANK));
2462 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2463 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2464 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2465 				NUM_BANKS(ADDR_SURF_8_BANK));
2466 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2467 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2468 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2469 				NUM_BANKS(ADDR_SURF_8_BANK));
2470 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2471 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2472 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2473 				NUM_BANKS(ADDR_SURF_8_BANK));
2474 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2475 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2476 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2477 				 NUM_BANKS(ADDR_SURF_8_BANK));
2478 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2480 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2481 				 NUM_BANKS(ADDR_SURF_8_BANK));
2482 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2483 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2484 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2485 				 NUM_BANKS(ADDR_SURF_8_BANK));
2486 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2487 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2488 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2489 				 NUM_BANKS(ADDR_SURF_8_BANK));
2490 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2491 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2492 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2493 				 NUM_BANKS(ADDR_SURF_4_BANK));
2494 
2495 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2496 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2497 
2498 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2499 			if (reg_offset != 7)
2500 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2501 
2502 		break;
2503 	case CHIP_TONGA:
2504 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2505 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2506 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2507 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2508 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2509 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2510 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2511 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2512 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2513 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2514 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2515 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2516 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2517 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2518 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2519 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2520 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2521 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2522 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2523 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2524 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2525 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2526 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2527 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2528 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2529 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2530 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2531 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2532 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2533 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2534 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2535 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2536 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2537 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2538 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2539 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2540 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2541 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2542 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2543 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2544 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2545 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2546 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2547 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2549 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2550 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2551 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2552 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2553 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2554 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2555 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2556 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2557 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2558 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2559 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2560 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2561 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2562 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2563 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2564 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2565 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2566 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2567 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2568 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2569 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2570 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2571 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2572 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2573 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2574 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2575 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2576 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2577 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2578 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2579 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2580 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2581 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2582 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2583 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2584 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2585 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2586 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2587 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2588 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2589 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2590 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2591 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2592 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2593 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2594 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2595 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2596 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2597 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2598 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2599 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2600 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2601 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2602 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2603 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2604 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2605 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2606 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2607 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2608 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2609 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2610 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2611 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2612 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2613 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2614 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2615 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2616 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2617 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2618 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2619 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2620 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2621 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2622 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2623 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2624 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2625 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2626 
2627 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2629 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2630 				NUM_BANKS(ADDR_SURF_16_BANK));
2631 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2633 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2634 				NUM_BANKS(ADDR_SURF_16_BANK));
2635 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2636 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2637 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2638 				NUM_BANKS(ADDR_SURF_16_BANK));
2639 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2641 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2642 				NUM_BANKS(ADDR_SURF_16_BANK));
2643 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2644 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2645 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2646 				NUM_BANKS(ADDR_SURF_16_BANK));
2647 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2648 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2649 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2650 				NUM_BANKS(ADDR_SURF_16_BANK));
2651 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2652 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2653 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2654 				NUM_BANKS(ADDR_SURF_16_BANK));
2655 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2656 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2657 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2658 				NUM_BANKS(ADDR_SURF_16_BANK));
2659 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2660 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2661 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2662 				NUM_BANKS(ADDR_SURF_16_BANK));
2663 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2664 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2665 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2666 				 NUM_BANKS(ADDR_SURF_16_BANK));
2667 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2668 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2669 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2670 				 NUM_BANKS(ADDR_SURF_16_BANK));
2671 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2672 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2673 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2674 				 NUM_BANKS(ADDR_SURF_8_BANK));
2675 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2676 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2677 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2678 				 NUM_BANKS(ADDR_SURF_4_BANK));
2679 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2680 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2681 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2682 				 NUM_BANKS(ADDR_SURF_4_BANK));
2683 
2684 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2685 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2686 
2687 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2688 			if (reg_offset != 7)
2689 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2690 
2691 		break;
2692 	case CHIP_POLARIS11:
2693 	case CHIP_POLARIS12:
2694 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2695 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2696 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2697 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2698 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2699 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2700 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2701 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2702 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2703 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2704 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2705 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2706 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2707 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2708 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2709 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2710 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2711 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2712 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2713 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2714 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2715 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2716 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2717 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2718 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2719 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2720 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2721 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2722 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2723 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2724 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2725 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2726 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2727 				PIPE_CONFIG(ADDR_SURF_P4_16x16));
2728 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2729 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2730 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2731 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2732 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2733 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2734 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2735 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2736 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2737 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2738 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2739 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2740 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2741 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2742 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2743 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2744 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2745 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2746 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2747 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2748 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2749 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2750 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2751 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2752 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2753 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2754 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2755 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2756 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2757 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2758 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2759 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2760 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2761 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2762 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2763 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2764 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2765 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2766 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2767 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2768 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2769 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2770 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2771 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2772 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2773 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2774 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2775 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2776 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2777 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2778 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2779 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2780 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2781 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2782 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2783 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2784 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2785 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2786 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2787 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2788 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2789 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2790 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2791 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2792 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2793 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2794 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2795 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2796 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2797 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2798 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2799 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2800 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2801 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2802 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2803 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2804 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2805 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2806 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2807 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2808 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2809 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2810 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2811 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2812 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2813 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2814 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2815 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2816 
2817 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2818 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2819 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2820 				NUM_BANKS(ADDR_SURF_16_BANK));
2821 
2822 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2823 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2824 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2825 				NUM_BANKS(ADDR_SURF_16_BANK));
2826 
2827 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2828 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2829 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2830 				NUM_BANKS(ADDR_SURF_16_BANK));
2831 
2832 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2833 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2834 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2835 				NUM_BANKS(ADDR_SURF_16_BANK));
2836 
2837 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2839 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2840 				NUM_BANKS(ADDR_SURF_16_BANK));
2841 
2842 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2843 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2844 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2845 				NUM_BANKS(ADDR_SURF_16_BANK));
2846 
2847 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2848 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2849 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2850 				NUM_BANKS(ADDR_SURF_16_BANK));
2851 
2852 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2853 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2854 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2855 				NUM_BANKS(ADDR_SURF_16_BANK));
2856 
2857 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2858 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2859 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2860 				NUM_BANKS(ADDR_SURF_16_BANK));
2861 
2862 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2863 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2864 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2865 				NUM_BANKS(ADDR_SURF_16_BANK));
2866 
2867 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2868 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2869 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2870 				NUM_BANKS(ADDR_SURF_16_BANK));
2871 
2872 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2873 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2874 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2875 				NUM_BANKS(ADDR_SURF_16_BANK));
2876 
2877 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2878 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2879 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2880 				NUM_BANKS(ADDR_SURF_8_BANK));
2881 
2882 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2883 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2884 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2885 				NUM_BANKS(ADDR_SURF_4_BANK));
2886 
2887 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2888 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2889 
2890 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2891 			if (reg_offset != 7)
2892 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2893 
2894 		break;
2895 	case CHIP_POLARIS10:
2896 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2897 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2898 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2899 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2900 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2901 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2902 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2903 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2904 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2905 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2906 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2907 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2908 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2909 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2910 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2911 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2912 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2913 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2914 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2915 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2916 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2917 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2918 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2919 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2920 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2921 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2922 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2923 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2924 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2925 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2926 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2927 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2928 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2929 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2930 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2931 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2932 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2933 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2934 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2935 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2936 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2937 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2938 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2939 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2940 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2941 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2942 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2943 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2944 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2945 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2946 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2947 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2948 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2949 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2950 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2951 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2952 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2953 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2954 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2955 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2956 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2957 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2958 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2959 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2960 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2961 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2962 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2963 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2964 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2965 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2966 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2967 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2968 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2969 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2970 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2971 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2972 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2973 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2974 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2975 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2976 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2977 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2978 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2979 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2980 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2981 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2982 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2983 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2984 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2985 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2986 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2987 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2988 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2989 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2990 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2991 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2992 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2993 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2994 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2995 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2996 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2997 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2998 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2999 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3000 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3001 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3002 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3003 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3004 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3005 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3006 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3007 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3008 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3009 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3010 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3011 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3012 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3013 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3014 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3015 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3016 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3017 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3018 
3019 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3020 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3021 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3022 				NUM_BANKS(ADDR_SURF_16_BANK));
3023 
3024 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3025 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3026 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3027 				NUM_BANKS(ADDR_SURF_16_BANK));
3028 
3029 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3030 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3031 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3032 				NUM_BANKS(ADDR_SURF_16_BANK));
3033 
3034 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3035 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3036 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3037 				NUM_BANKS(ADDR_SURF_16_BANK));
3038 
3039 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3040 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3041 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3042 				NUM_BANKS(ADDR_SURF_16_BANK));
3043 
3044 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3045 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3046 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3047 				NUM_BANKS(ADDR_SURF_16_BANK));
3048 
3049 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3050 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3051 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3052 				NUM_BANKS(ADDR_SURF_16_BANK));
3053 
3054 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3055 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3056 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3057 				NUM_BANKS(ADDR_SURF_16_BANK));
3058 
3059 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3060 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3061 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3062 				NUM_BANKS(ADDR_SURF_16_BANK));
3063 
3064 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3065 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3066 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3067 				NUM_BANKS(ADDR_SURF_16_BANK));
3068 
3069 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3070 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3071 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3072 				NUM_BANKS(ADDR_SURF_16_BANK));
3073 
3074 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3075 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3076 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3077 				NUM_BANKS(ADDR_SURF_8_BANK));
3078 
3079 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3080 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3081 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3082 				NUM_BANKS(ADDR_SURF_4_BANK));
3083 
3084 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3085 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3086 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3087 				NUM_BANKS(ADDR_SURF_4_BANK));
3088 
3089 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3090 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3091 
3092 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3093 			if (reg_offset != 7)
3094 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3095 
3096 		break;
3097 	case CHIP_STONEY:
3098 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3099 				PIPE_CONFIG(ADDR_SURF_P2) |
3100 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3101 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3102 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3103 				PIPE_CONFIG(ADDR_SURF_P2) |
3104 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3105 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3106 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3107 				PIPE_CONFIG(ADDR_SURF_P2) |
3108 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3109 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3110 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3111 				PIPE_CONFIG(ADDR_SURF_P2) |
3112 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3113 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3114 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3115 				PIPE_CONFIG(ADDR_SURF_P2) |
3116 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3117 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3118 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3119 				PIPE_CONFIG(ADDR_SURF_P2) |
3120 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3121 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3122 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3123 				PIPE_CONFIG(ADDR_SURF_P2) |
3124 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3125 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3126 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3127 				PIPE_CONFIG(ADDR_SURF_P2));
3128 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3129 				PIPE_CONFIG(ADDR_SURF_P2) |
3130 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3131 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3132 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3133 				 PIPE_CONFIG(ADDR_SURF_P2) |
3134 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3135 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3136 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3137 				 PIPE_CONFIG(ADDR_SURF_P2) |
3138 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3139 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3140 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3141 				 PIPE_CONFIG(ADDR_SURF_P2) |
3142 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3143 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3144 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3145 				 PIPE_CONFIG(ADDR_SURF_P2) |
3146 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3147 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3148 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3149 				 PIPE_CONFIG(ADDR_SURF_P2) |
3150 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3151 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3152 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3153 				 PIPE_CONFIG(ADDR_SURF_P2) |
3154 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3155 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3156 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3157 				 PIPE_CONFIG(ADDR_SURF_P2) |
3158 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3159 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3160 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3161 				 PIPE_CONFIG(ADDR_SURF_P2) |
3162 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3163 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3164 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3165 				 PIPE_CONFIG(ADDR_SURF_P2) |
3166 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3167 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3168 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3169 				 PIPE_CONFIG(ADDR_SURF_P2) |
3170 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3171 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3172 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3173 				 PIPE_CONFIG(ADDR_SURF_P2) |
3174 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3175 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3176 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3177 				 PIPE_CONFIG(ADDR_SURF_P2) |
3178 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3179 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3180 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3181 				 PIPE_CONFIG(ADDR_SURF_P2) |
3182 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3183 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3184 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3185 				 PIPE_CONFIG(ADDR_SURF_P2) |
3186 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3187 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3188 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3189 				 PIPE_CONFIG(ADDR_SURF_P2) |
3190 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3191 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3192 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3193 				 PIPE_CONFIG(ADDR_SURF_P2) |
3194 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3195 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3196 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3197 				 PIPE_CONFIG(ADDR_SURF_P2) |
3198 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3199 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3200 
3201 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3202 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3203 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3204 				NUM_BANKS(ADDR_SURF_8_BANK));
3205 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3206 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3207 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3208 				NUM_BANKS(ADDR_SURF_8_BANK));
3209 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3210 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3211 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3212 				NUM_BANKS(ADDR_SURF_8_BANK));
3213 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3214 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3215 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3216 				NUM_BANKS(ADDR_SURF_8_BANK));
3217 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3218 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3219 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3220 				NUM_BANKS(ADDR_SURF_8_BANK));
3221 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3222 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3223 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3224 				NUM_BANKS(ADDR_SURF_8_BANK));
3225 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3226 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3227 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3228 				NUM_BANKS(ADDR_SURF_8_BANK));
3229 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3230 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3231 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3232 				NUM_BANKS(ADDR_SURF_16_BANK));
3233 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3234 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3235 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3236 				NUM_BANKS(ADDR_SURF_16_BANK));
3237 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3238 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3239 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3240 				 NUM_BANKS(ADDR_SURF_16_BANK));
3241 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3242 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3243 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3244 				 NUM_BANKS(ADDR_SURF_16_BANK));
3245 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3246 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3247 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3248 				 NUM_BANKS(ADDR_SURF_16_BANK));
3249 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3250 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3251 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3252 				 NUM_BANKS(ADDR_SURF_16_BANK));
3253 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3254 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3255 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3256 				 NUM_BANKS(ADDR_SURF_8_BANK));
3257 
3258 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3259 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3260 			    reg_offset != 23)
3261 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3262 
3263 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3264 			if (reg_offset != 7)
3265 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3266 
3267 		break;
3268 	default:
3269 		dev_warn(adev->dev,
3270 			 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3271 			 adev->asic_type);
3272 		/* fall through */
3273 
3274 	case CHIP_CARRIZO:
3275 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3276 				PIPE_CONFIG(ADDR_SURF_P2) |
3277 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3278 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3279 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3280 				PIPE_CONFIG(ADDR_SURF_P2) |
3281 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3282 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3283 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3284 				PIPE_CONFIG(ADDR_SURF_P2) |
3285 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3286 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3287 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3288 				PIPE_CONFIG(ADDR_SURF_P2) |
3289 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3290 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3291 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3292 				PIPE_CONFIG(ADDR_SURF_P2) |
3293 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3294 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3295 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3296 				PIPE_CONFIG(ADDR_SURF_P2) |
3297 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3298 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3299 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3300 				PIPE_CONFIG(ADDR_SURF_P2) |
3301 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3302 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3303 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3304 				PIPE_CONFIG(ADDR_SURF_P2));
3305 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3306 				PIPE_CONFIG(ADDR_SURF_P2) |
3307 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3308 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3309 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3310 				 PIPE_CONFIG(ADDR_SURF_P2) |
3311 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3312 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3313 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3314 				 PIPE_CONFIG(ADDR_SURF_P2) |
3315 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3316 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3317 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3318 				 PIPE_CONFIG(ADDR_SURF_P2) |
3319 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3320 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3321 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3322 				 PIPE_CONFIG(ADDR_SURF_P2) |
3323 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3324 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3325 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3326 				 PIPE_CONFIG(ADDR_SURF_P2) |
3327 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3328 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3329 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3330 				 PIPE_CONFIG(ADDR_SURF_P2) |
3331 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3332 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3333 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3334 				 PIPE_CONFIG(ADDR_SURF_P2) |
3335 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3336 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3337 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3338 				 PIPE_CONFIG(ADDR_SURF_P2) |
3339 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3340 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3341 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3342 				 PIPE_CONFIG(ADDR_SURF_P2) |
3343 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3344 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3345 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3346 				 PIPE_CONFIG(ADDR_SURF_P2) |
3347 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3348 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3349 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3350 				 PIPE_CONFIG(ADDR_SURF_P2) |
3351 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3352 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3353 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3354 				 PIPE_CONFIG(ADDR_SURF_P2) |
3355 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3356 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3357 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3358 				 PIPE_CONFIG(ADDR_SURF_P2) |
3359 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3360 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3361 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3362 				 PIPE_CONFIG(ADDR_SURF_P2) |
3363 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3364 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3365 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3366 				 PIPE_CONFIG(ADDR_SURF_P2) |
3367 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3368 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3369 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3370 				 PIPE_CONFIG(ADDR_SURF_P2) |
3371 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3372 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3373 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3374 				 PIPE_CONFIG(ADDR_SURF_P2) |
3375 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3376 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3377 
3378 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3379 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3380 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3381 				NUM_BANKS(ADDR_SURF_8_BANK));
3382 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3383 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3384 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3385 				NUM_BANKS(ADDR_SURF_8_BANK));
3386 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3387 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3388 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3389 				NUM_BANKS(ADDR_SURF_8_BANK));
3390 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3391 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3392 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3393 				NUM_BANKS(ADDR_SURF_8_BANK));
3394 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3395 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3396 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3397 				NUM_BANKS(ADDR_SURF_8_BANK));
3398 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3399 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3400 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3401 				NUM_BANKS(ADDR_SURF_8_BANK));
3402 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3403 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3404 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3405 				NUM_BANKS(ADDR_SURF_8_BANK));
3406 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3407 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3408 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3409 				NUM_BANKS(ADDR_SURF_16_BANK));
3410 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3411 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3412 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3413 				NUM_BANKS(ADDR_SURF_16_BANK));
3414 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3415 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3416 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3417 				 NUM_BANKS(ADDR_SURF_16_BANK));
3418 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3419 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3420 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3421 				 NUM_BANKS(ADDR_SURF_16_BANK));
3422 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3423 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3424 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3425 				 NUM_BANKS(ADDR_SURF_16_BANK));
3426 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3427 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3428 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3429 				 NUM_BANKS(ADDR_SURF_16_BANK));
3430 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3431 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3432 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3433 				 NUM_BANKS(ADDR_SURF_8_BANK));
3434 
3435 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3436 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3437 			    reg_offset != 23)
3438 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3439 
3440 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3441 			if (reg_offset != 7)
3442 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3443 
3444 		break;
3445 	}
3446 }
3447 
3448 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3449 				  u32 se_num, u32 sh_num, u32 instance)
3450 {
3451 	u32 data;
3452 
3453 	if (instance == 0xffffffff)
3454 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3455 	else
3456 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3457 
3458 	if (se_num == 0xffffffff)
3459 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3460 	else
3461 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3462 
3463 	if (sh_num == 0xffffffff)
3464 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3465 	else
3466 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3467 
3468 	WREG32(mmGRBM_GFX_INDEX, data);
3469 }
3470 
3471 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3472 				  u32 me, u32 pipe, u32 q, u32 vm)
3473 {
3474 	vi_srbm_select(adev, me, pipe, q, vm);
3475 }
3476 
3477 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3478 {
3479 	u32 data, mask;
3480 
3481 	data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3482 		RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3483 
3484 	data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3485 
3486 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3487 					 adev->gfx.config.max_sh_per_se);
3488 
3489 	return (~data) & mask;
3490 }
3491 
3492 static void
3493 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3494 {
3495 	switch (adev->asic_type) {
3496 	case CHIP_FIJI:
3497 	case CHIP_VEGAM:
3498 		*rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3499 			  RB_XSEL2(1) | PKR_MAP(2) |
3500 			  PKR_XSEL(1) | PKR_YSEL(1) |
3501 			  SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3502 		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3503 			   SE_PAIR_YSEL(2);
3504 		break;
3505 	case CHIP_TONGA:
3506 	case CHIP_POLARIS10:
3507 		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3508 			  SE_XSEL(1) | SE_YSEL(1);
3509 		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3510 			   SE_PAIR_YSEL(2);
3511 		break;
3512 	case CHIP_TOPAZ:
3513 	case CHIP_CARRIZO:
3514 		*rconf |= RB_MAP_PKR0(2);
3515 		*rconf1 |= 0x0;
3516 		break;
3517 	case CHIP_POLARIS11:
3518 	case CHIP_POLARIS12:
3519 		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3520 			  SE_XSEL(1) | SE_YSEL(1);
3521 		*rconf1 |= 0x0;
3522 		break;
3523 	case CHIP_STONEY:
3524 		*rconf |= 0x0;
3525 		*rconf1 |= 0x0;
3526 		break;
3527 	default:
3528 		DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3529 		break;
3530 	}
3531 }
3532 
3533 static void
3534 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3535 					u32 raster_config, u32 raster_config_1,
3536 					unsigned rb_mask, unsigned num_rb)
3537 {
3538 	unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3539 	unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3540 	unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3541 	unsigned rb_per_se = num_rb / num_se;
3542 	unsigned se_mask[4];
3543 	unsigned se;
3544 
3545 	se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3546 	se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3547 	se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3548 	se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3549 
3550 	WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3551 	WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3552 	WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3553 
3554 	if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3555 			     (!se_mask[2] && !se_mask[3]))) {
3556 		raster_config_1 &= ~SE_PAIR_MAP_MASK;
3557 
3558 		if (!se_mask[0] && !se_mask[1]) {
3559 			raster_config_1 |=
3560 				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3561 		} else {
3562 			raster_config_1 |=
3563 				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3564 		}
3565 	}
3566 
3567 	for (se = 0; se < num_se; se++) {
3568 		unsigned raster_config_se = raster_config;
3569 		unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3570 		unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3571 		int idx = (se / 2) * 2;
3572 
3573 		if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3574 			raster_config_se &= ~SE_MAP_MASK;
3575 
3576 			if (!se_mask[idx]) {
3577 				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3578 			} else {
3579 				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3580 			}
3581 		}
3582 
3583 		pkr0_mask &= rb_mask;
3584 		pkr1_mask &= rb_mask;
3585 		if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3586 			raster_config_se &= ~PKR_MAP_MASK;
3587 
3588 			if (!pkr0_mask) {
3589 				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3590 			} else {
3591 				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3592 			}
3593 		}
3594 
3595 		if (rb_per_se >= 2) {
3596 			unsigned rb0_mask = 1 << (se * rb_per_se);
3597 			unsigned rb1_mask = rb0_mask << 1;
3598 
3599 			rb0_mask &= rb_mask;
3600 			rb1_mask &= rb_mask;
3601 			if (!rb0_mask || !rb1_mask) {
3602 				raster_config_se &= ~RB_MAP_PKR0_MASK;
3603 
3604 				if (!rb0_mask) {
3605 					raster_config_se |=
3606 						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3607 				} else {
3608 					raster_config_se |=
3609 						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3610 				}
3611 			}
3612 
3613 			if (rb_per_se > 2) {
3614 				rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3615 				rb1_mask = rb0_mask << 1;
3616 				rb0_mask &= rb_mask;
3617 				rb1_mask &= rb_mask;
3618 				if (!rb0_mask || !rb1_mask) {
3619 					raster_config_se &= ~RB_MAP_PKR1_MASK;
3620 
3621 					if (!rb0_mask) {
3622 						raster_config_se |=
3623 							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3624 					} else {
3625 						raster_config_se |=
3626 							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3627 					}
3628 				}
3629 			}
3630 		}
3631 
3632 		/* GRBM_GFX_INDEX has a different offset on VI */
3633 		gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3634 		WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3635 		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3636 	}
3637 
3638 	/* GRBM_GFX_INDEX has a different offset on VI */
3639 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3640 }
3641 
3642 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3643 {
3644 	int i, j;
3645 	u32 data;
3646 	u32 raster_config = 0, raster_config_1 = 0;
3647 	u32 active_rbs = 0;
3648 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3649 					adev->gfx.config.max_sh_per_se;
3650 	unsigned num_rb_pipes;
3651 
3652 	mutex_lock(&adev->grbm_idx_mutex);
3653 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3654 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3655 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3656 			data = gfx_v8_0_get_rb_active_bitmap(adev);
3657 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3658 					       rb_bitmap_width_per_sh);
3659 		}
3660 	}
3661 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3662 
3663 	adev->gfx.config.backend_enable_mask = active_rbs;
3664 	adev->gfx.config.num_rbs = hweight32(active_rbs);
3665 
3666 	num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3667 			     adev->gfx.config.max_shader_engines, 16);
3668 
3669 	gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3670 
3671 	if (!adev->gfx.config.backend_enable_mask ||
3672 			adev->gfx.config.num_rbs >= num_rb_pipes) {
3673 		WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3674 		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3675 	} else {
3676 		gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3677 							adev->gfx.config.backend_enable_mask,
3678 							num_rb_pipes);
3679 	}
3680 
3681 	/* cache the values for userspace */
3682 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3683 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3684 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3685 			adev->gfx.config.rb_config[i][j].rb_backend_disable =
3686 				RREG32(mmCC_RB_BACKEND_DISABLE);
3687 			adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3688 				RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3689 			adev->gfx.config.rb_config[i][j].raster_config =
3690 				RREG32(mmPA_SC_RASTER_CONFIG);
3691 			adev->gfx.config.rb_config[i][j].raster_config_1 =
3692 				RREG32(mmPA_SC_RASTER_CONFIG_1);
3693 		}
3694 	}
3695 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3696 	mutex_unlock(&adev->grbm_idx_mutex);
3697 }
3698 
3699 /**
3700  * gfx_v8_0_init_compute_vmid - gart enable
3701  *
3702  * @adev: amdgpu_device pointer
3703  *
3704  * Initialize compute vmid sh_mem registers
3705  *
3706  */
3707 #define DEFAULT_SH_MEM_BASES	(0x6000)
3708 #define FIRST_COMPUTE_VMID	(8)
3709 #define LAST_COMPUTE_VMID	(16)
3710 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3711 {
3712 	int i;
3713 	uint32_t sh_mem_config;
3714 	uint32_t sh_mem_bases;
3715 
3716 	/*
3717 	 * Configure apertures:
3718 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3719 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3720 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3721 	 */
3722 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3723 
3724 	sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3725 			SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3726 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3727 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3728 			MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3729 			SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3730 
3731 	mutex_lock(&adev->srbm_mutex);
3732 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3733 		vi_srbm_select(adev, 0, 0, 0, i);
3734 		/* CP and shaders */
3735 		WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3736 		WREG32(mmSH_MEM_APE1_BASE, 1);
3737 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3738 		WREG32(mmSH_MEM_BASES, sh_mem_bases);
3739 	}
3740 	vi_srbm_select(adev, 0, 0, 0, 0);
3741 	mutex_unlock(&adev->srbm_mutex);
3742 
3743 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
3744 	   acccess. These should be enabled by FW for target VMIDs. */
3745 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3746 		WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
3747 		WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
3748 		WREG32(amdgpu_gds_reg_offset[i].gws, 0);
3749 		WREG32(amdgpu_gds_reg_offset[i].oa, 0);
3750 	}
3751 }
3752 
3753 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3754 {
3755 	switch (adev->asic_type) {
3756 	default:
3757 		adev->gfx.config.double_offchip_lds_buf = 1;
3758 		break;
3759 	case CHIP_CARRIZO:
3760 	case CHIP_STONEY:
3761 		adev->gfx.config.double_offchip_lds_buf = 0;
3762 		break;
3763 	}
3764 }
3765 
3766 static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3767 {
3768 	u32 tmp, sh_static_mem_cfg;
3769 	int i;
3770 
3771 	WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3772 	WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3773 	WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3774 	WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3775 
3776 	gfx_v8_0_tiling_mode_table_init(adev);
3777 	gfx_v8_0_setup_rb(adev);
3778 	gfx_v8_0_get_cu_info(adev);
3779 	gfx_v8_0_config_init(adev);
3780 
3781 	/* XXX SH_MEM regs */
3782 	/* where to put LDS, scratch, GPUVM in FSA64 space */
3783 	sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3784 				   SWIZZLE_ENABLE, 1);
3785 	sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3786 				   ELEMENT_SIZE, 1);
3787 	sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3788 				   INDEX_STRIDE, 3);
3789 	WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3790 
3791 	mutex_lock(&adev->srbm_mutex);
3792 	for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3793 		vi_srbm_select(adev, 0, 0, 0, i);
3794 		/* CP and shaders */
3795 		if (i == 0) {
3796 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3797 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3798 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3799 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3800 			WREG32(mmSH_MEM_CONFIG, tmp);
3801 			WREG32(mmSH_MEM_BASES, 0);
3802 		} else {
3803 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3804 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3805 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3806 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3807 			WREG32(mmSH_MEM_CONFIG, tmp);
3808 			tmp = adev->gmc.shared_aperture_start >> 48;
3809 			WREG32(mmSH_MEM_BASES, tmp);
3810 		}
3811 
3812 		WREG32(mmSH_MEM_APE1_BASE, 1);
3813 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3814 	}
3815 	vi_srbm_select(adev, 0, 0, 0, 0);
3816 	mutex_unlock(&adev->srbm_mutex);
3817 
3818 	gfx_v8_0_init_compute_vmid(adev);
3819 
3820 	mutex_lock(&adev->grbm_idx_mutex);
3821 	/*
3822 	 * making sure that the following register writes will be broadcasted
3823 	 * to all the shaders
3824 	 */
3825 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3826 
3827 	WREG32(mmPA_SC_FIFO_SIZE,
3828 		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
3829 			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3830 		   (adev->gfx.config.sc_prim_fifo_size_backend <<
3831 			PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3832 		   (adev->gfx.config.sc_hiz_tile_fifo_size <<
3833 			PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3834 		   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3835 			PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3836 
3837 	tmp = RREG32(mmSPI_ARB_PRIORITY);
3838 	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3839 	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3840 	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3841 	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3842 	WREG32(mmSPI_ARB_PRIORITY, tmp);
3843 
3844 	mutex_unlock(&adev->grbm_idx_mutex);
3845 
3846 }
3847 
3848 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3849 {
3850 	u32 i, j, k;
3851 	u32 mask;
3852 
3853 	mutex_lock(&adev->grbm_idx_mutex);
3854 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3855 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3856 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3857 			for (k = 0; k < adev->usec_timeout; k++) {
3858 				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3859 					break;
3860 				udelay(1);
3861 			}
3862 			if (k == adev->usec_timeout) {
3863 				gfx_v8_0_select_se_sh(adev, 0xffffffff,
3864 						      0xffffffff, 0xffffffff);
3865 				mutex_unlock(&adev->grbm_idx_mutex);
3866 				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3867 					 i, j);
3868 				return;
3869 			}
3870 		}
3871 	}
3872 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3873 	mutex_unlock(&adev->grbm_idx_mutex);
3874 
3875 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3876 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3877 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3878 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3879 	for (k = 0; k < adev->usec_timeout; k++) {
3880 		if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3881 			break;
3882 		udelay(1);
3883 	}
3884 }
3885 
3886 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3887 					       bool enable)
3888 {
3889 	u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3890 
3891 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3892 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3893 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3894 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3895 
3896 	WREG32(mmCP_INT_CNTL_RING0, tmp);
3897 }
3898 
3899 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3900 {
3901 	/* csib */
3902 	WREG32(mmRLC_CSIB_ADDR_HI,
3903 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
3904 	WREG32(mmRLC_CSIB_ADDR_LO,
3905 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3906 	WREG32(mmRLC_CSIB_LENGTH,
3907 			adev->gfx.rlc.clear_state_size);
3908 }
3909 
3910 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3911 				int ind_offset,
3912 				int list_size,
3913 				int *unique_indices,
3914 				int *indices_count,
3915 				int max_indices,
3916 				int *ind_start_offsets,
3917 				int *offset_count,
3918 				int max_offset)
3919 {
3920 	int indices;
3921 	bool new_entry = true;
3922 
3923 	for (; ind_offset < list_size; ind_offset++) {
3924 
3925 		if (new_entry) {
3926 			new_entry = false;
3927 			ind_start_offsets[*offset_count] = ind_offset;
3928 			*offset_count = *offset_count + 1;
3929 			BUG_ON(*offset_count >= max_offset);
3930 		}
3931 
3932 		if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3933 			new_entry = true;
3934 			continue;
3935 		}
3936 
3937 		ind_offset += 2;
3938 
3939 		/* look for the matching indice */
3940 		for (indices = 0;
3941 			indices < *indices_count;
3942 			indices++) {
3943 			if (unique_indices[indices] ==
3944 				register_list_format[ind_offset])
3945 				break;
3946 		}
3947 
3948 		if (indices >= *indices_count) {
3949 			unique_indices[*indices_count] =
3950 				register_list_format[ind_offset];
3951 			indices = *indices_count;
3952 			*indices_count = *indices_count + 1;
3953 			BUG_ON(*indices_count >= max_indices);
3954 		}
3955 
3956 		register_list_format[ind_offset] = indices;
3957 	}
3958 }
3959 
3960 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3961 {
3962 	int i, temp, data;
3963 	int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3964 	int indices_count = 0;
3965 	int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3966 	int offset_count = 0;
3967 
3968 	int list_size;
3969 	unsigned int *register_list_format =
3970 		kmemdup(adev->gfx.rlc.register_list_format,
3971 			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3972 	if (!register_list_format)
3973 		return -ENOMEM;
3974 
3975 	gfx_v8_0_parse_ind_reg_list(register_list_format,
3976 				RLC_FormatDirectRegListLength,
3977 				adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3978 				unique_indices,
3979 				&indices_count,
3980 				ARRAY_SIZE(unique_indices),
3981 				indirect_start_offsets,
3982 				&offset_count,
3983 				ARRAY_SIZE(indirect_start_offsets));
3984 
3985 	/* save and restore list */
3986 	WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3987 
3988 	WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3989 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3990 		WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3991 
3992 	/* indirect list */
3993 	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3994 	for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3995 		WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3996 
3997 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3998 	list_size = list_size >> 1;
3999 	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4000 	WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4001 
4002 	/* starting offsets starts */
4003 	WREG32(mmRLC_GPM_SCRATCH_ADDR,
4004 		adev->gfx.rlc.starting_offsets_start);
4005 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
4006 		WREG32(mmRLC_GPM_SCRATCH_DATA,
4007 				indirect_start_offsets[i]);
4008 
4009 	/* unique indices */
4010 	temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4011 	data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4012 	for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4013 		if (unique_indices[i] != 0) {
4014 			WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4015 			WREG32(data + i, unique_indices[i] >> 20);
4016 		}
4017 	}
4018 	kfree(register_list_format);
4019 
4020 	return 0;
4021 }
4022 
4023 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4024 {
4025 	WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4026 }
4027 
4028 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4029 {
4030 	uint32_t data;
4031 
4032 	WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4033 
4034 	data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4035 	data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4036 	data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4037 	data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4038 	WREG32(mmRLC_PG_DELAY, data);
4039 
4040 	WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4041 	WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4042 
4043 }
4044 
4045 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4046 						bool enable)
4047 {
4048 	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4049 }
4050 
4051 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4052 						  bool enable)
4053 {
4054 	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4055 }
4056 
4057 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4058 {
4059 	WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4060 }
4061 
4062 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4063 {
4064 	if ((adev->asic_type == CHIP_CARRIZO) ||
4065 	    (adev->asic_type == CHIP_STONEY)) {
4066 		gfx_v8_0_init_csb(adev);
4067 		gfx_v8_0_init_save_restore_list(adev);
4068 		gfx_v8_0_enable_save_restore_machine(adev);
4069 		WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4070 		gfx_v8_0_init_power_gating(adev);
4071 		WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4072 	} else if ((adev->asic_type == CHIP_POLARIS11) ||
4073 		   (adev->asic_type == CHIP_POLARIS12) ||
4074 		   (adev->asic_type == CHIP_VEGAM)) {
4075 		gfx_v8_0_init_csb(adev);
4076 		gfx_v8_0_init_save_restore_list(adev);
4077 		gfx_v8_0_enable_save_restore_machine(adev);
4078 		gfx_v8_0_init_power_gating(adev);
4079 	}
4080 
4081 }
4082 
4083 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4084 {
4085 	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4086 
4087 	gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4088 	gfx_v8_0_wait_for_rlc_serdes(adev);
4089 }
4090 
4091 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4092 {
4093 	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4094 	udelay(50);
4095 
4096 	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4097 	udelay(50);
4098 }
4099 
4100 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4101 {
4102 	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4103 
4104 	/* carrizo do enable cp interrupt after cp inited */
4105 	if (!(adev->flags & AMD_IS_APU))
4106 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4107 
4108 	udelay(50);
4109 }
4110 
4111 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4112 {
4113 	if (amdgpu_sriov_vf(adev)) {
4114 		gfx_v8_0_init_csb(adev);
4115 		return 0;
4116 	}
4117 
4118 	adev->gfx.rlc.funcs->stop(adev);
4119 	adev->gfx.rlc.funcs->reset(adev);
4120 	gfx_v8_0_init_pg(adev);
4121 	adev->gfx.rlc.funcs->start(adev);
4122 
4123 	return 0;
4124 }
4125 
4126 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4127 {
4128 	int i;
4129 	u32 tmp = RREG32(mmCP_ME_CNTL);
4130 
4131 	if (enable) {
4132 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4133 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4134 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4135 	} else {
4136 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4137 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4138 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4139 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4140 			adev->gfx.gfx_ring[i].sched.ready = false;
4141 	}
4142 	WREG32(mmCP_ME_CNTL, tmp);
4143 	udelay(50);
4144 }
4145 
4146 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4147 {
4148 	u32 count = 0;
4149 	const struct cs_section_def *sect = NULL;
4150 	const struct cs_extent_def *ext = NULL;
4151 
4152 	/* begin clear state */
4153 	count += 2;
4154 	/* context control state */
4155 	count += 3;
4156 
4157 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4158 		for (ext = sect->section; ext->extent != NULL; ++ext) {
4159 			if (sect->id == SECT_CONTEXT)
4160 				count += 2 + ext->reg_count;
4161 			else
4162 				return 0;
4163 		}
4164 	}
4165 	/* pa_sc_raster_config/pa_sc_raster_config1 */
4166 	count += 4;
4167 	/* end clear state */
4168 	count += 2;
4169 	/* clear state */
4170 	count += 2;
4171 
4172 	return count;
4173 }
4174 
4175 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4176 {
4177 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4178 	const struct cs_section_def *sect = NULL;
4179 	const struct cs_extent_def *ext = NULL;
4180 	int r, i;
4181 
4182 	/* init the CP */
4183 	WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4184 	WREG32(mmCP_ENDIAN_SWAP, 0);
4185 	WREG32(mmCP_DEVICE_ID, 1);
4186 
4187 	gfx_v8_0_cp_gfx_enable(adev, true);
4188 
4189 	r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4190 	if (r) {
4191 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4192 		return r;
4193 	}
4194 
4195 	/* clear state buffer */
4196 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4197 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4198 
4199 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4200 	amdgpu_ring_write(ring, 0x80000000);
4201 	amdgpu_ring_write(ring, 0x80000000);
4202 
4203 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4204 		for (ext = sect->section; ext->extent != NULL; ++ext) {
4205 			if (sect->id == SECT_CONTEXT) {
4206 				amdgpu_ring_write(ring,
4207 				       PACKET3(PACKET3_SET_CONTEXT_REG,
4208 					       ext->reg_count));
4209 				amdgpu_ring_write(ring,
4210 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4211 				for (i = 0; i < ext->reg_count; i++)
4212 					amdgpu_ring_write(ring, ext->extent[i]);
4213 			}
4214 		}
4215 	}
4216 
4217 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4218 	amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4219 	amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4220 	amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4221 
4222 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4223 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4224 
4225 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4226 	amdgpu_ring_write(ring, 0);
4227 
4228 	/* init the CE partitions */
4229 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4230 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4231 	amdgpu_ring_write(ring, 0x8000);
4232 	amdgpu_ring_write(ring, 0x8000);
4233 
4234 	amdgpu_ring_commit(ring);
4235 
4236 	return 0;
4237 }
4238 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4239 {
4240 	u32 tmp;
4241 	/* no gfx doorbells on iceland */
4242 	if (adev->asic_type == CHIP_TOPAZ)
4243 		return;
4244 
4245 	tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4246 
4247 	if (ring->use_doorbell) {
4248 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4249 				DOORBELL_OFFSET, ring->doorbell_index);
4250 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4251 						DOORBELL_HIT, 0);
4252 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4253 					    DOORBELL_EN, 1);
4254 	} else {
4255 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4256 	}
4257 
4258 	WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4259 
4260 	if (adev->flags & AMD_IS_APU)
4261 		return;
4262 
4263 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4264 					DOORBELL_RANGE_LOWER,
4265 					adev->doorbell_index.gfx_ring0);
4266 	WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4267 
4268 	WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4269 		CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4270 }
4271 
4272 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4273 {
4274 	struct amdgpu_ring *ring;
4275 	u32 tmp;
4276 	u32 rb_bufsz;
4277 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
4278 
4279 	/* Set the write pointer delay */
4280 	WREG32(mmCP_RB_WPTR_DELAY, 0);
4281 
4282 	/* set the RB to use vmid 0 */
4283 	WREG32(mmCP_RB_VMID, 0);
4284 
4285 	/* Set ring buffer size */
4286 	ring = &adev->gfx.gfx_ring[0];
4287 	rb_bufsz = order_base_2(ring->ring_size / 8);
4288 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4289 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4290 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4291 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4292 #ifdef __BIG_ENDIAN
4293 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4294 #endif
4295 	WREG32(mmCP_RB0_CNTL, tmp);
4296 
4297 	/* Initialize the ring buffer's read and write pointers */
4298 	WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4299 	ring->wptr = 0;
4300 	WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4301 
4302 	/* set the wb address wether it's enabled or not */
4303 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4304 	WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4305 	WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4306 
4307 	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4308 	WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4309 	WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4310 	mdelay(1);
4311 	WREG32(mmCP_RB0_CNTL, tmp);
4312 
4313 	rb_addr = ring->gpu_addr >> 8;
4314 	WREG32(mmCP_RB0_BASE, rb_addr);
4315 	WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4316 
4317 	gfx_v8_0_set_cpg_door_bell(adev, ring);
4318 	/* start the ring */
4319 	amdgpu_ring_clear_ring(ring);
4320 	gfx_v8_0_cp_gfx_start(adev);
4321 	ring->sched.ready = true;
4322 
4323 	return 0;
4324 }
4325 
4326 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4327 {
4328 	int i;
4329 
4330 	if (enable) {
4331 		WREG32(mmCP_MEC_CNTL, 0);
4332 	} else {
4333 		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4334 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
4335 			adev->gfx.compute_ring[i].sched.ready = false;
4336 		adev->gfx.kiq.ring.sched.ready = false;
4337 	}
4338 	udelay(50);
4339 }
4340 
4341 /* KIQ functions */
4342 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4343 {
4344 	uint32_t tmp;
4345 	struct amdgpu_device *adev = ring->adev;
4346 
4347 	/* tell RLC which is KIQ queue */
4348 	tmp = RREG32(mmRLC_CP_SCHEDULERS);
4349 	tmp &= 0xffffff00;
4350 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4351 	WREG32(mmRLC_CP_SCHEDULERS, tmp);
4352 	tmp |= 0x80;
4353 	WREG32(mmRLC_CP_SCHEDULERS, tmp);
4354 }
4355 
4356 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4357 {
4358 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4359 	uint64_t queue_mask = 0;
4360 	int r, i;
4361 
4362 	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4363 		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4364 			continue;
4365 
4366 		/* This situation may be hit in the future if a new HW
4367 		 * generation exposes more than 64 queues. If so, the
4368 		 * definition of queue_mask needs updating */
4369 		if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4370 			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4371 			break;
4372 		}
4373 
4374 		queue_mask |= (1ull << i);
4375 	}
4376 
4377 	r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
4378 	if (r) {
4379 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4380 		return r;
4381 	}
4382 	/* set resources */
4383 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4384 	amdgpu_ring_write(kiq_ring, 0);	/* vmid_mask:0 queue_type:0 (KIQ) */
4385 	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
4386 	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
4387 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
4388 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
4389 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
4390 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
4391 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4392 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4393 		uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4394 		uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4395 
4396 		/* map queues */
4397 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4398 		/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4399 		amdgpu_ring_write(kiq_ring,
4400 				  PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4401 		amdgpu_ring_write(kiq_ring,
4402 				  PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4403 				  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4404 				  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4405 				  PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4406 		amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4407 		amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4408 		amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4409 		amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4410 	}
4411 
4412 	amdgpu_ring_commit(kiq_ring);
4413 
4414 	return 0;
4415 }
4416 
4417 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4418 {
4419 	int i, r = 0;
4420 
4421 	if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4422 		WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4423 		for (i = 0; i < adev->usec_timeout; i++) {
4424 			if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4425 				break;
4426 			udelay(1);
4427 		}
4428 		if (i == adev->usec_timeout)
4429 			r = -ETIMEDOUT;
4430 	}
4431 	WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4432 	WREG32(mmCP_HQD_PQ_RPTR, 0);
4433 	WREG32(mmCP_HQD_PQ_WPTR, 0);
4434 
4435 	return r;
4436 }
4437 
4438 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4439 {
4440 	struct amdgpu_device *adev = ring->adev;
4441 	struct vi_mqd *mqd = ring->mqd_ptr;
4442 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4443 	uint32_t tmp;
4444 
4445 	mqd->header = 0xC0310800;
4446 	mqd->compute_pipelinestat_enable = 0x00000001;
4447 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4448 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4449 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4450 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4451 	mqd->compute_misc_reserved = 0x00000003;
4452 	mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4453 						     + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4454 	mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4455 						     + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4456 	eop_base_addr = ring->eop_gpu_addr >> 8;
4457 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4458 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4459 
4460 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4461 	tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4462 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4463 			(order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4464 
4465 	mqd->cp_hqd_eop_control = tmp;
4466 
4467 	/* enable doorbell? */
4468 	tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4469 			    CP_HQD_PQ_DOORBELL_CONTROL,
4470 			    DOORBELL_EN,
4471 			    ring->use_doorbell ? 1 : 0);
4472 
4473 	mqd->cp_hqd_pq_doorbell_control = tmp;
4474 
4475 	/* set the pointer to the MQD */
4476 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4477 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4478 
4479 	/* set MQD vmid to 0 */
4480 	tmp = RREG32(mmCP_MQD_CONTROL);
4481 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4482 	mqd->cp_mqd_control = tmp;
4483 
4484 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4485 	hqd_gpu_addr = ring->gpu_addr >> 8;
4486 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4487 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4488 
4489 	/* set up the HQD, this is similar to CP_RB0_CNTL */
4490 	tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4491 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4492 			    (order_base_2(ring->ring_size / 4) - 1));
4493 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4494 			((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4495 #ifdef __BIG_ENDIAN
4496 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4497 #endif
4498 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4499 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4500 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4501 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4502 	mqd->cp_hqd_pq_control = tmp;
4503 
4504 	/* set the wb address whether it's enabled or not */
4505 	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4506 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4507 	mqd->cp_hqd_pq_rptr_report_addr_hi =
4508 		upper_32_bits(wb_gpu_addr) & 0xffff;
4509 
4510 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4511 	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4512 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4513 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4514 
4515 	tmp = 0;
4516 	/* enable the doorbell if requested */
4517 	if (ring->use_doorbell) {
4518 		tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4519 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4520 				DOORBELL_OFFSET, ring->doorbell_index);
4521 
4522 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4523 					 DOORBELL_EN, 1);
4524 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4525 					 DOORBELL_SOURCE, 0);
4526 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4527 					 DOORBELL_HIT, 0);
4528 	}
4529 
4530 	mqd->cp_hqd_pq_doorbell_control = tmp;
4531 
4532 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4533 	ring->wptr = 0;
4534 	mqd->cp_hqd_pq_wptr = ring->wptr;
4535 	mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4536 
4537 	/* set the vmid for the queue */
4538 	mqd->cp_hqd_vmid = 0;
4539 
4540 	tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4541 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4542 	mqd->cp_hqd_persistent_state = tmp;
4543 
4544 	/* set MTYPE */
4545 	tmp = RREG32(mmCP_HQD_IB_CONTROL);
4546 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4547 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4548 	mqd->cp_hqd_ib_control = tmp;
4549 
4550 	tmp = RREG32(mmCP_HQD_IQ_TIMER);
4551 	tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4552 	mqd->cp_hqd_iq_timer = tmp;
4553 
4554 	tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4555 	tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4556 	mqd->cp_hqd_ctx_save_control = tmp;
4557 
4558 	/* defaults */
4559 	mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4560 	mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4561 	mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4562 	mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4563 	mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4564 	mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4565 	mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4566 	mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4567 	mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4568 	mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4569 	mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4570 	mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4571 	mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4572 	mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4573 	mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4574 
4575 	/* activate the queue */
4576 	mqd->cp_hqd_active = 1;
4577 
4578 	return 0;
4579 }
4580 
4581 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4582 			struct vi_mqd *mqd)
4583 {
4584 	uint32_t mqd_reg;
4585 	uint32_t *mqd_data;
4586 
4587 	/* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4588 	mqd_data = &mqd->cp_mqd_base_addr_lo;
4589 
4590 	/* disable wptr polling */
4591 	WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4592 
4593 	/* program all HQD registers */
4594 	for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4595 		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4596 
4597 	/* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4598 	 * This is safe since EOP RPTR==WPTR for any inactive HQD
4599 	 * on ASICs that do not support context-save.
4600 	 * EOP writes/reads can start anywhere in the ring.
4601 	 */
4602 	if (adev->asic_type != CHIP_TONGA) {
4603 		WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4604 		WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4605 		WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4606 	}
4607 
4608 	for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4609 		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4610 
4611 	/* activate the HQD */
4612 	for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4613 		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4614 
4615 	return 0;
4616 }
4617 
4618 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4619 {
4620 	struct amdgpu_device *adev = ring->adev;
4621 	struct vi_mqd *mqd = ring->mqd_ptr;
4622 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4623 
4624 	gfx_v8_0_kiq_setting(ring);
4625 
4626 	if (adev->in_gpu_reset) { /* for GPU_RESET case */
4627 		/* reset MQD to a clean status */
4628 		if (adev->gfx.mec.mqd_backup[mqd_idx])
4629 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4630 
4631 		/* reset ring buffer */
4632 		ring->wptr = 0;
4633 		amdgpu_ring_clear_ring(ring);
4634 		mutex_lock(&adev->srbm_mutex);
4635 		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4636 		gfx_v8_0_mqd_commit(adev, mqd);
4637 		vi_srbm_select(adev, 0, 0, 0, 0);
4638 		mutex_unlock(&adev->srbm_mutex);
4639 	} else {
4640 		memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4641 		((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4642 		((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4643 		mutex_lock(&adev->srbm_mutex);
4644 		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4645 		gfx_v8_0_mqd_init(ring);
4646 		gfx_v8_0_mqd_commit(adev, mqd);
4647 		vi_srbm_select(adev, 0, 0, 0, 0);
4648 		mutex_unlock(&adev->srbm_mutex);
4649 
4650 		if (adev->gfx.mec.mqd_backup[mqd_idx])
4651 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4652 	}
4653 
4654 	return 0;
4655 }
4656 
4657 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4658 {
4659 	struct amdgpu_device *adev = ring->adev;
4660 	struct vi_mqd *mqd = ring->mqd_ptr;
4661 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
4662 
4663 	if (!adev->in_gpu_reset && !adev->in_suspend) {
4664 		memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4665 		((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4666 		((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4667 		mutex_lock(&adev->srbm_mutex);
4668 		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4669 		gfx_v8_0_mqd_init(ring);
4670 		vi_srbm_select(adev, 0, 0, 0, 0);
4671 		mutex_unlock(&adev->srbm_mutex);
4672 
4673 		if (adev->gfx.mec.mqd_backup[mqd_idx])
4674 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4675 	} else if (adev->in_gpu_reset) { /* for GPU_RESET case */
4676 		/* reset MQD to a clean status */
4677 		if (adev->gfx.mec.mqd_backup[mqd_idx])
4678 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4679 		/* reset ring buffer */
4680 		ring->wptr = 0;
4681 		amdgpu_ring_clear_ring(ring);
4682 	} else {
4683 		amdgpu_ring_clear_ring(ring);
4684 	}
4685 	return 0;
4686 }
4687 
4688 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4689 {
4690 	if (adev->asic_type > CHIP_TONGA) {
4691 		WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2);
4692 		WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2);
4693 	}
4694 	/* enable doorbells */
4695 	WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4696 }
4697 
4698 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4699 {
4700 	struct amdgpu_ring *ring;
4701 	int r;
4702 
4703 	ring = &adev->gfx.kiq.ring;
4704 
4705 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
4706 	if (unlikely(r != 0))
4707 		return r;
4708 
4709 	r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4710 	if (unlikely(r != 0))
4711 		return r;
4712 
4713 	gfx_v8_0_kiq_init_queue(ring);
4714 	amdgpu_bo_kunmap(ring->mqd_obj);
4715 	ring->mqd_ptr = NULL;
4716 	amdgpu_bo_unreserve(ring->mqd_obj);
4717 	ring->sched.ready = true;
4718 	return 0;
4719 }
4720 
4721 static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4722 {
4723 	struct amdgpu_ring *ring = NULL;
4724 	int r = 0, i;
4725 
4726 	gfx_v8_0_cp_compute_enable(adev, true);
4727 
4728 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4729 		ring = &adev->gfx.compute_ring[i];
4730 
4731 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
4732 		if (unlikely(r != 0))
4733 			goto done;
4734 		r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4735 		if (!r) {
4736 			r = gfx_v8_0_kcq_init_queue(ring);
4737 			amdgpu_bo_kunmap(ring->mqd_obj);
4738 			ring->mqd_ptr = NULL;
4739 		}
4740 		amdgpu_bo_unreserve(ring->mqd_obj);
4741 		if (r)
4742 			goto done;
4743 	}
4744 
4745 	gfx_v8_0_set_mec_doorbell_range(adev);
4746 
4747 	r = gfx_v8_0_kiq_kcq_enable(adev);
4748 	if (r)
4749 		goto done;
4750 
4751 done:
4752 	return r;
4753 }
4754 
4755 static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
4756 {
4757 	int r, i;
4758 	struct amdgpu_ring *ring;
4759 
4760 	/* collect all the ring_tests here, gfx, kiq, compute */
4761 	ring = &adev->gfx.gfx_ring[0];
4762 	r = amdgpu_ring_test_helper(ring);
4763 	if (r)
4764 		return r;
4765 
4766 	ring = &adev->gfx.kiq.ring;
4767 	r = amdgpu_ring_test_helper(ring);
4768 	if (r)
4769 		return r;
4770 
4771 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4772 		ring = &adev->gfx.compute_ring[i];
4773 		amdgpu_ring_test_helper(ring);
4774 	}
4775 
4776 	return 0;
4777 }
4778 
4779 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4780 {
4781 	int r;
4782 
4783 	if (!(adev->flags & AMD_IS_APU))
4784 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4785 
4786 	r = gfx_v8_0_kiq_resume(adev);
4787 	if (r)
4788 		return r;
4789 
4790 	r = gfx_v8_0_cp_gfx_resume(adev);
4791 	if (r)
4792 		return r;
4793 
4794 	r = gfx_v8_0_kcq_resume(adev);
4795 	if (r)
4796 		return r;
4797 
4798 	r = gfx_v8_0_cp_test_all_rings(adev);
4799 	if (r)
4800 		return r;
4801 
4802 	gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4803 
4804 	return 0;
4805 }
4806 
4807 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4808 {
4809 	gfx_v8_0_cp_gfx_enable(adev, enable);
4810 	gfx_v8_0_cp_compute_enable(adev, enable);
4811 }
4812 
4813 static int gfx_v8_0_hw_init(void *handle)
4814 {
4815 	int r;
4816 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4817 
4818 	gfx_v8_0_init_golden_registers(adev);
4819 	gfx_v8_0_constants_init(adev);
4820 
4821 	r = gfx_v8_0_csb_vram_pin(adev);
4822 	if (r)
4823 		return r;
4824 
4825 	r = adev->gfx.rlc.funcs->resume(adev);
4826 	if (r)
4827 		return r;
4828 
4829 	r = gfx_v8_0_cp_resume(adev);
4830 
4831 	return r;
4832 }
4833 
4834 static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
4835 {
4836 	int r, i;
4837 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4838 
4839 	r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
4840 	if (r)
4841 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4842 
4843 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4844 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4845 
4846 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4847 		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
4848 						PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
4849 						PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
4850 						PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
4851 						PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
4852 		amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
4853 		amdgpu_ring_write(kiq_ring, 0);
4854 		amdgpu_ring_write(kiq_ring, 0);
4855 		amdgpu_ring_write(kiq_ring, 0);
4856 	}
4857 	r = amdgpu_ring_test_helper(kiq_ring);
4858 	if (r)
4859 		DRM_ERROR("KCQ disable failed\n");
4860 
4861 	return r;
4862 }
4863 
4864 static bool gfx_v8_0_is_idle(void *handle)
4865 {
4866 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4867 
4868 	if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
4869 		|| RREG32(mmGRBM_STATUS2) != 0x8)
4870 		return false;
4871 	else
4872 		return true;
4873 }
4874 
4875 static bool gfx_v8_0_rlc_is_idle(void *handle)
4876 {
4877 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4878 
4879 	if (RREG32(mmGRBM_STATUS2) != 0x8)
4880 		return false;
4881 	else
4882 		return true;
4883 }
4884 
4885 static int gfx_v8_0_wait_for_rlc_idle(void *handle)
4886 {
4887 	unsigned int i;
4888 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4889 
4890 	for (i = 0; i < adev->usec_timeout; i++) {
4891 		if (gfx_v8_0_rlc_is_idle(handle))
4892 			return 0;
4893 
4894 		udelay(1);
4895 	}
4896 	return -ETIMEDOUT;
4897 }
4898 
4899 static int gfx_v8_0_wait_for_idle(void *handle)
4900 {
4901 	unsigned int i;
4902 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4903 
4904 	for (i = 0; i < adev->usec_timeout; i++) {
4905 		if (gfx_v8_0_is_idle(handle))
4906 			return 0;
4907 
4908 		udelay(1);
4909 	}
4910 	return -ETIMEDOUT;
4911 }
4912 
4913 static int gfx_v8_0_hw_fini(void *handle)
4914 {
4915 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4916 
4917 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4918 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4919 
4920 	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4921 
4922 	amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
4923 
4924 	/* disable KCQ to avoid CPC touch memory not valid anymore */
4925 	gfx_v8_0_kcq_disable(adev);
4926 
4927 	if (amdgpu_sriov_vf(adev)) {
4928 		pr_debug("For SRIOV client, shouldn't do anything.\n");
4929 		return 0;
4930 	}
4931 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4932 	if (!gfx_v8_0_wait_for_idle(adev))
4933 		gfx_v8_0_cp_enable(adev, false);
4934 	else
4935 		pr_err("cp is busy, skip halt cp\n");
4936 	if (!gfx_v8_0_wait_for_rlc_idle(adev))
4937 		adev->gfx.rlc.funcs->stop(adev);
4938 	else
4939 		pr_err("rlc is busy, skip halt rlc\n");
4940 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4941 
4942 	gfx_v8_0_csb_vram_unpin(adev);
4943 
4944 	return 0;
4945 }
4946 
4947 static int gfx_v8_0_suspend(void *handle)
4948 {
4949 	return gfx_v8_0_hw_fini(handle);
4950 }
4951 
4952 static int gfx_v8_0_resume(void *handle)
4953 {
4954 	return gfx_v8_0_hw_init(handle);
4955 }
4956 
4957 static bool gfx_v8_0_check_soft_reset(void *handle)
4958 {
4959 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4960 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4961 	u32 tmp;
4962 
4963 	/* GRBM_STATUS */
4964 	tmp = RREG32(mmGRBM_STATUS);
4965 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4966 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4967 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4968 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4969 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4970 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4971 		   GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4972 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4973 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4974 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4975 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4976 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4977 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4978 	}
4979 
4980 	/* GRBM_STATUS2 */
4981 	tmp = RREG32(mmGRBM_STATUS2);
4982 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4983 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4984 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4985 
4986 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
4987 	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
4988 	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
4989 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4990 						SOFT_RESET_CPF, 1);
4991 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4992 						SOFT_RESET_CPC, 1);
4993 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4994 						SOFT_RESET_CPG, 1);
4995 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
4996 						SOFT_RESET_GRBM, 1);
4997 	}
4998 
4999 	/* SRBM_STATUS */
5000 	tmp = RREG32(mmSRBM_STATUS);
5001 	if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5002 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5003 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5004 	if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5005 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5006 						SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5007 
5008 	if (grbm_soft_reset || srbm_soft_reset) {
5009 		adev->gfx.grbm_soft_reset = grbm_soft_reset;
5010 		adev->gfx.srbm_soft_reset = srbm_soft_reset;
5011 		return true;
5012 	} else {
5013 		adev->gfx.grbm_soft_reset = 0;
5014 		adev->gfx.srbm_soft_reset = 0;
5015 		return false;
5016 	}
5017 }
5018 
5019 static int gfx_v8_0_pre_soft_reset(void *handle)
5020 {
5021 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5022 	u32 grbm_soft_reset = 0;
5023 
5024 	if ((!adev->gfx.grbm_soft_reset) &&
5025 	    (!adev->gfx.srbm_soft_reset))
5026 		return 0;
5027 
5028 	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5029 
5030 	/* stop the rlc */
5031 	adev->gfx.rlc.funcs->stop(adev);
5032 
5033 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5034 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5035 		/* Disable GFX parsing/prefetching */
5036 		gfx_v8_0_cp_gfx_enable(adev, false);
5037 
5038 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5039 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5040 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5041 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5042 		int i;
5043 
5044 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5045 			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5046 
5047 			mutex_lock(&adev->srbm_mutex);
5048 			vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5049 			gfx_v8_0_deactivate_hqd(adev, 2);
5050 			vi_srbm_select(adev, 0, 0, 0, 0);
5051 			mutex_unlock(&adev->srbm_mutex);
5052 		}
5053 		/* Disable MEC parsing/prefetching */
5054 		gfx_v8_0_cp_compute_enable(adev, false);
5055 	}
5056 
5057        return 0;
5058 }
5059 
5060 static int gfx_v8_0_soft_reset(void *handle)
5061 {
5062 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5063 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5064 	u32 tmp;
5065 
5066 	if ((!adev->gfx.grbm_soft_reset) &&
5067 	    (!adev->gfx.srbm_soft_reset))
5068 		return 0;
5069 
5070 	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5071 	srbm_soft_reset = adev->gfx.srbm_soft_reset;
5072 
5073 	if (grbm_soft_reset || srbm_soft_reset) {
5074 		tmp = RREG32(mmGMCON_DEBUG);
5075 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5076 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5077 		WREG32(mmGMCON_DEBUG, tmp);
5078 		udelay(50);
5079 	}
5080 
5081 	if (grbm_soft_reset) {
5082 		tmp = RREG32(mmGRBM_SOFT_RESET);
5083 		tmp |= grbm_soft_reset;
5084 		dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5085 		WREG32(mmGRBM_SOFT_RESET, tmp);
5086 		tmp = RREG32(mmGRBM_SOFT_RESET);
5087 
5088 		udelay(50);
5089 
5090 		tmp &= ~grbm_soft_reset;
5091 		WREG32(mmGRBM_SOFT_RESET, tmp);
5092 		tmp = RREG32(mmGRBM_SOFT_RESET);
5093 	}
5094 
5095 	if (srbm_soft_reset) {
5096 		tmp = RREG32(mmSRBM_SOFT_RESET);
5097 		tmp |= srbm_soft_reset;
5098 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5099 		WREG32(mmSRBM_SOFT_RESET, tmp);
5100 		tmp = RREG32(mmSRBM_SOFT_RESET);
5101 
5102 		udelay(50);
5103 
5104 		tmp &= ~srbm_soft_reset;
5105 		WREG32(mmSRBM_SOFT_RESET, tmp);
5106 		tmp = RREG32(mmSRBM_SOFT_RESET);
5107 	}
5108 
5109 	if (grbm_soft_reset || srbm_soft_reset) {
5110 		tmp = RREG32(mmGMCON_DEBUG);
5111 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5112 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5113 		WREG32(mmGMCON_DEBUG, tmp);
5114 	}
5115 
5116 	/* Wait a little for things to settle down */
5117 	udelay(50);
5118 
5119 	return 0;
5120 }
5121 
5122 static int gfx_v8_0_post_soft_reset(void *handle)
5123 {
5124 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5125 	u32 grbm_soft_reset = 0;
5126 
5127 	if ((!adev->gfx.grbm_soft_reset) &&
5128 	    (!adev->gfx.srbm_soft_reset))
5129 		return 0;
5130 
5131 	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5132 
5133 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5134 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5135 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5136 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5137 		int i;
5138 
5139 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5140 			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5141 
5142 			mutex_lock(&adev->srbm_mutex);
5143 			vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5144 			gfx_v8_0_deactivate_hqd(adev, 2);
5145 			vi_srbm_select(adev, 0, 0, 0, 0);
5146 			mutex_unlock(&adev->srbm_mutex);
5147 		}
5148 		gfx_v8_0_kiq_resume(adev);
5149 		gfx_v8_0_kcq_resume(adev);
5150 	}
5151 
5152 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5153 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5154 		gfx_v8_0_cp_gfx_resume(adev);
5155 
5156 	gfx_v8_0_cp_test_all_rings(adev);
5157 
5158 	adev->gfx.rlc.funcs->start(adev);
5159 
5160 	return 0;
5161 }
5162 
5163 /**
5164  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5165  *
5166  * @adev: amdgpu_device pointer
5167  *
5168  * Fetches a GPU clock counter snapshot.
5169  * Returns the 64 bit clock counter snapshot.
5170  */
5171 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5172 {
5173 	uint64_t clock;
5174 
5175 	mutex_lock(&adev->gfx.gpu_clock_mutex);
5176 	WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5177 	clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5178 		((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5179 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
5180 	return clock;
5181 }
5182 
5183 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5184 					  uint32_t vmid,
5185 					  uint32_t gds_base, uint32_t gds_size,
5186 					  uint32_t gws_base, uint32_t gws_size,
5187 					  uint32_t oa_base, uint32_t oa_size)
5188 {
5189 	/* GDS Base */
5190 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5191 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5192 				WRITE_DATA_DST_SEL(0)));
5193 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5194 	amdgpu_ring_write(ring, 0);
5195 	amdgpu_ring_write(ring, gds_base);
5196 
5197 	/* GDS Size */
5198 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5199 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5200 				WRITE_DATA_DST_SEL(0)));
5201 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5202 	amdgpu_ring_write(ring, 0);
5203 	amdgpu_ring_write(ring, gds_size);
5204 
5205 	/* GWS */
5206 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5207 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5208 				WRITE_DATA_DST_SEL(0)));
5209 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5210 	amdgpu_ring_write(ring, 0);
5211 	amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5212 
5213 	/* OA */
5214 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5215 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5216 				WRITE_DATA_DST_SEL(0)));
5217 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5218 	amdgpu_ring_write(ring, 0);
5219 	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5220 }
5221 
5222 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5223 {
5224 	WREG32(mmSQ_IND_INDEX,
5225 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5226 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5227 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
5228 		(SQ_IND_INDEX__FORCE_READ_MASK));
5229 	return RREG32(mmSQ_IND_DATA);
5230 }
5231 
5232 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5233 			   uint32_t wave, uint32_t thread,
5234 			   uint32_t regno, uint32_t num, uint32_t *out)
5235 {
5236 	WREG32(mmSQ_IND_INDEX,
5237 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5238 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5239 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
5240 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5241 		(SQ_IND_INDEX__FORCE_READ_MASK) |
5242 		(SQ_IND_INDEX__AUTO_INCR_MASK));
5243 	while (num--)
5244 		*(out++) = RREG32(mmSQ_IND_DATA);
5245 }
5246 
5247 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5248 {
5249 	/* type 0 wave data */
5250 	dst[(*no_fields)++] = 0;
5251 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5252 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5253 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5254 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5255 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5256 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5257 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5258 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5259 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5260 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5261 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5262 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5263 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5264 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5265 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5266 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5267 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5268 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5269 }
5270 
5271 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5272 				     uint32_t wave, uint32_t start,
5273 				     uint32_t size, uint32_t *dst)
5274 {
5275 	wave_read_regs(
5276 		adev, simd, wave, 0,
5277 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5278 }
5279 
5280 
5281 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5282 	.get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5283 	.select_se_sh = &gfx_v8_0_select_se_sh,
5284 	.read_wave_data = &gfx_v8_0_read_wave_data,
5285 	.read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5286 	.select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5287 };
5288 
5289 static int gfx_v8_0_early_init(void *handle)
5290 {
5291 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5292 
5293 	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5294 	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5295 	adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5296 	gfx_v8_0_set_ring_funcs(adev);
5297 	gfx_v8_0_set_irq_funcs(adev);
5298 	gfx_v8_0_set_gds_init(adev);
5299 	gfx_v8_0_set_rlc_funcs(adev);
5300 
5301 	return 0;
5302 }
5303 
5304 static int gfx_v8_0_late_init(void *handle)
5305 {
5306 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5307 	int r;
5308 
5309 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5310 	if (r)
5311 		return r;
5312 
5313 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5314 	if (r)
5315 		return r;
5316 
5317 	/* requires IBs so do in late init after IB pool is initialized */
5318 	r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5319 	if (r)
5320 		return r;
5321 
5322 	r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5323 	if (r) {
5324 		DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5325 		return r;
5326 	}
5327 
5328 	r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5329 	if (r) {
5330 		DRM_ERROR(
5331 			"amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5332 			r);
5333 		return r;
5334 	}
5335 
5336 	return 0;
5337 }
5338 
5339 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5340 						       bool enable)
5341 {
5342 	if (((adev->asic_type == CHIP_POLARIS11) ||
5343 	    (adev->asic_type == CHIP_POLARIS12) ||
5344 	    (adev->asic_type == CHIP_VEGAM)) &&
5345 	    adev->powerplay.pp_funcs->set_powergating_by_smu)
5346 		/* Send msg to SMU via Powerplay */
5347 		amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5348 
5349 	WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5350 }
5351 
5352 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5353 							bool enable)
5354 {
5355 	WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5356 }
5357 
5358 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5359 		bool enable)
5360 {
5361 	WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5362 }
5363 
5364 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5365 					  bool enable)
5366 {
5367 	WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5368 }
5369 
5370 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5371 						bool enable)
5372 {
5373 	WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5374 
5375 	/* Read any GFX register to wake up GFX. */
5376 	if (!enable)
5377 		RREG32(mmDB_RENDER_CONTROL);
5378 }
5379 
5380 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5381 					  bool enable)
5382 {
5383 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5384 		cz_enable_gfx_cg_power_gating(adev, true);
5385 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5386 			cz_enable_gfx_pipeline_power_gating(adev, true);
5387 	} else {
5388 		cz_enable_gfx_cg_power_gating(adev, false);
5389 		cz_enable_gfx_pipeline_power_gating(adev, false);
5390 	}
5391 }
5392 
5393 static int gfx_v8_0_set_powergating_state(void *handle,
5394 					  enum amd_powergating_state state)
5395 {
5396 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5397 	bool enable = (state == AMD_PG_STATE_GATE);
5398 
5399 	if (amdgpu_sriov_vf(adev))
5400 		return 0;
5401 
5402 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5403 				AMD_PG_SUPPORT_RLC_SMU_HS |
5404 				AMD_PG_SUPPORT_CP |
5405 				AMD_PG_SUPPORT_GFX_DMG))
5406 		amdgpu_gfx_rlc_enter_safe_mode(adev);
5407 	switch (adev->asic_type) {
5408 	case CHIP_CARRIZO:
5409 	case CHIP_STONEY:
5410 
5411 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5412 			cz_enable_sck_slow_down_on_power_up(adev, true);
5413 			cz_enable_sck_slow_down_on_power_down(adev, true);
5414 		} else {
5415 			cz_enable_sck_slow_down_on_power_up(adev, false);
5416 			cz_enable_sck_slow_down_on_power_down(adev, false);
5417 		}
5418 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5419 			cz_enable_cp_power_gating(adev, true);
5420 		else
5421 			cz_enable_cp_power_gating(adev, false);
5422 
5423 		cz_update_gfx_cg_power_gating(adev, enable);
5424 
5425 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5426 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5427 		else
5428 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5429 
5430 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5431 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5432 		else
5433 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5434 		break;
5435 	case CHIP_POLARIS11:
5436 	case CHIP_POLARIS12:
5437 	case CHIP_VEGAM:
5438 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5439 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5440 		else
5441 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5442 
5443 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5444 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5445 		else
5446 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5447 
5448 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5449 			polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5450 		else
5451 			polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5452 		break;
5453 	default:
5454 		break;
5455 	}
5456 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5457 				AMD_PG_SUPPORT_RLC_SMU_HS |
5458 				AMD_PG_SUPPORT_CP |
5459 				AMD_PG_SUPPORT_GFX_DMG))
5460 		amdgpu_gfx_rlc_exit_safe_mode(adev);
5461 	return 0;
5462 }
5463 
5464 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5465 {
5466 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5467 	int data;
5468 
5469 	if (amdgpu_sriov_vf(adev))
5470 		*flags = 0;
5471 
5472 	/* AMD_CG_SUPPORT_GFX_MGCG */
5473 	data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5474 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5475 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5476 
5477 	/* AMD_CG_SUPPORT_GFX_CGLG */
5478 	data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5479 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5480 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5481 
5482 	/* AMD_CG_SUPPORT_GFX_CGLS */
5483 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5484 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5485 
5486 	/* AMD_CG_SUPPORT_GFX_CGTS */
5487 	data = RREG32(mmCGTS_SM_CTRL_REG);
5488 	if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5489 		*flags |= AMD_CG_SUPPORT_GFX_CGTS;
5490 
5491 	/* AMD_CG_SUPPORT_GFX_CGTS_LS */
5492 	if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5493 		*flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5494 
5495 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
5496 	data = RREG32(mmRLC_MEM_SLP_CNTL);
5497 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5498 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5499 
5500 	/* AMD_CG_SUPPORT_GFX_CP_LS */
5501 	data = RREG32(mmCP_MEM_SLP_CNTL);
5502 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5503 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5504 }
5505 
5506 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5507 				     uint32_t reg_addr, uint32_t cmd)
5508 {
5509 	uint32_t data;
5510 
5511 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5512 
5513 	WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5514 	WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5515 
5516 	data = RREG32(mmRLC_SERDES_WR_CTRL);
5517 	if (adev->asic_type == CHIP_STONEY)
5518 		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5519 			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5520 			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5521 			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5522 			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5523 			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5524 			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5525 			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5526 			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5527 	else
5528 		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5529 			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5530 			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5531 			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5532 			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5533 			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5534 			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5535 			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5536 			  RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5537 			  RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5538 			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5539 	data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5540 		 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5541 		 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5542 		 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5543 
5544 	WREG32(mmRLC_SERDES_WR_CTRL, data);
5545 }
5546 
5547 #define MSG_ENTER_RLC_SAFE_MODE     1
5548 #define MSG_EXIT_RLC_SAFE_MODE      0
5549 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5550 #define RLC_GPR_REG2__REQ__SHIFT 0
5551 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5552 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5553 
5554 static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
5555 {
5556 	uint32_t rlc_setting;
5557 
5558 	rlc_setting = RREG32(mmRLC_CNTL);
5559 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
5560 		return false;
5561 
5562 	return true;
5563 }
5564 
5565 static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev)
5566 {
5567 	uint32_t data;
5568 	unsigned i;
5569 	data = RREG32(mmRLC_CNTL);
5570 	data |= RLC_SAFE_MODE__CMD_MASK;
5571 	data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5572 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5573 	WREG32(mmRLC_SAFE_MODE, data);
5574 
5575 	/* wait for RLC_SAFE_MODE */
5576 	for (i = 0; i < adev->usec_timeout; i++) {
5577 		if ((RREG32(mmRLC_GPM_STAT) &
5578 		     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5579 		      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5580 		    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5581 		     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5582 			break;
5583 		udelay(1);
5584 	}
5585 	for (i = 0; i < adev->usec_timeout; i++) {
5586 		if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5587 			break;
5588 		udelay(1);
5589 	}
5590 }
5591 
5592 static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev)
5593 {
5594 	uint32_t data;
5595 	unsigned i;
5596 
5597 	data = RREG32(mmRLC_CNTL);
5598 	data |= RLC_SAFE_MODE__CMD_MASK;
5599 	data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5600 	WREG32(mmRLC_SAFE_MODE, data);
5601 
5602 	for (i = 0; i < adev->usec_timeout; i++) {
5603 		if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5604 			break;
5605 		udelay(1);
5606 	}
5607 }
5608 
5609 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5610 	.is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
5611 	.set_safe_mode = gfx_v8_0_set_safe_mode,
5612 	.unset_safe_mode = gfx_v8_0_unset_safe_mode,
5613 	.init = gfx_v8_0_rlc_init,
5614 	.get_csb_size = gfx_v8_0_get_csb_size,
5615 	.get_csb_buffer = gfx_v8_0_get_csb_buffer,
5616 	.get_cp_table_num = gfx_v8_0_cp_jump_table_num,
5617 	.resume = gfx_v8_0_rlc_resume,
5618 	.stop = gfx_v8_0_rlc_stop,
5619 	.reset = gfx_v8_0_rlc_reset,
5620 	.start = gfx_v8_0_rlc_start
5621 };
5622 
5623 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5624 						      bool enable)
5625 {
5626 	uint32_t temp, data;
5627 
5628 	amdgpu_gfx_rlc_enter_safe_mode(adev);
5629 
5630 	/* It is disabled by HW by default */
5631 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5632 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5633 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5634 				/* 1 - RLC memory Light sleep */
5635 				WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5636 
5637 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5638 				WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5639 		}
5640 
5641 		/* 3 - RLC_CGTT_MGCG_OVERRIDE */
5642 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5643 		if (adev->flags & AMD_IS_APU)
5644 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5645 				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5646 				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5647 		else
5648 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5649 				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5650 				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5651 				  RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5652 
5653 		if (temp != data)
5654 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5655 
5656 		/* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5657 		gfx_v8_0_wait_for_rlc_serdes(adev);
5658 
5659 		/* 5 - clear mgcg override */
5660 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5661 
5662 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5663 			/* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5664 			temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5665 			data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5666 			data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5667 			data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5668 			data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5669 			if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5670 			    (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5671 				data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5672 			data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5673 			data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5674 			if (temp != data)
5675 				WREG32(mmCGTS_SM_CTRL_REG, data);
5676 		}
5677 		udelay(50);
5678 
5679 		/* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5680 		gfx_v8_0_wait_for_rlc_serdes(adev);
5681 	} else {
5682 		/* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5683 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5684 		data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5685 				RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5686 				RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5687 				RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5688 		if (temp != data)
5689 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5690 
5691 		/* 2 - disable MGLS in RLC */
5692 		data = RREG32(mmRLC_MEM_SLP_CNTL);
5693 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5694 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5695 			WREG32(mmRLC_MEM_SLP_CNTL, data);
5696 		}
5697 
5698 		/* 3 - disable MGLS in CP */
5699 		data = RREG32(mmCP_MEM_SLP_CNTL);
5700 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5701 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5702 			WREG32(mmCP_MEM_SLP_CNTL, data);
5703 		}
5704 
5705 		/* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5706 		temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5707 		data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5708 				CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5709 		if (temp != data)
5710 			WREG32(mmCGTS_SM_CTRL_REG, data);
5711 
5712 		/* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5713 		gfx_v8_0_wait_for_rlc_serdes(adev);
5714 
5715 		/* 6 - set mgcg override */
5716 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5717 
5718 		udelay(50);
5719 
5720 		/* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5721 		gfx_v8_0_wait_for_rlc_serdes(adev);
5722 	}
5723 
5724 	amdgpu_gfx_rlc_exit_safe_mode(adev);
5725 }
5726 
5727 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5728 						      bool enable)
5729 {
5730 	uint32_t temp, temp1, data, data1;
5731 
5732 	temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5733 
5734 	amdgpu_gfx_rlc_enter_safe_mode(adev);
5735 
5736 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5737 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5738 		data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5739 		if (temp1 != data1)
5740 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5741 
5742 		/* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5743 		gfx_v8_0_wait_for_rlc_serdes(adev);
5744 
5745 		/* 2 - clear cgcg override */
5746 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5747 
5748 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5749 		gfx_v8_0_wait_for_rlc_serdes(adev);
5750 
5751 		/* 3 - write cmd to set CGLS */
5752 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5753 
5754 		/* 4 - enable cgcg */
5755 		data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5756 
5757 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5758 			/* enable cgls*/
5759 			data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5760 
5761 			temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5762 			data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5763 
5764 			if (temp1 != data1)
5765 				WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5766 		} else {
5767 			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5768 		}
5769 
5770 		if (temp != data)
5771 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5772 
5773 		/* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5774 		 * Cmp_busy/GFX_Idle interrupts
5775 		 */
5776 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5777 	} else {
5778 		/* disable cntx_empty_int_enable & GFX Idle interrupt */
5779 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5780 
5781 		/* TEST CGCG */
5782 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5783 		data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5784 				RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5785 		if (temp1 != data1)
5786 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5787 
5788 		/* read gfx register to wake up cgcg */
5789 		RREG32(mmCB_CGTT_SCLK_CTRL);
5790 		RREG32(mmCB_CGTT_SCLK_CTRL);
5791 		RREG32(mmCB_CGTT_SCLK_CTRL);
5792 		RREG32(mmCB_CGTT_SCLK_CTRL);
5793 
5794 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5795 		gfx_v8_0_wait_for_rlc_serdes(adev);
5796 
5797 		/* write cmd to Set CGCG Overrride */
5798 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5799 
5800 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5801 		gfx_v8_0_wait_for_rlc_serdes(adev);
5802 
5803 		/* write cmd to Clear CGLS */
5804 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5805 
5806 		/* disable cgcg, cgls should be disabled too. */
5807 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5808 			  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5809 		if (temp != data)
5810 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5811 		/* enable interrupts again for PG */
5812 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5813 	}
5814 
5815 	gfx_v8_0_wait_for_rlc_serdes(adev);
5816 
5817 	amdgpu_gfx_rlc_exit_safe_mode(adev);
5818 }
5819 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5820 					    bool enable)
5821 {
5822 	if (enable) {
5823 		/* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5824 		 * ===  MGCG + MGLS + TS(CG/LS) ===
5825 		 */
5826 		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5827 		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5828 	} else {
5829 		/* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5830 		 * ===  CGCG + CGLS ===
5831 		 */
5832 		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5833 		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5834 	}
5835 	return 0;
5836 }
5837 
5838 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5839 					  enum amd_clockgating_state state)
5840 {
5841 	uint32_t msg_id, pp_state = 0;
5842 	uint32_t pp_support_state = 0;
5843 
5844 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5845 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5846 			pp_support_state = PP_STATE_SUPPORT_LS;
5847 			pp_state = PP_STATE_LS;
5848 		}
5849 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5850 			pp_support_state |= PP_STATE_SUPPORT_CG;
5851 			pp_state |= PP_STATE_CG;
5852 		}
5853 		if (state == AMD_CG_STATE_UNGATE)
5854 			pp_state = 0;
5855 
5856 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5857 				PP_BLOCK_GFX_CG,
5858 				pp_support_state,
5859 				pp_state);
5860 		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5861 			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5862 	}
5863 
5864 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5865 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5866 			pp_support_state = PP_STATE_SUPPORT_LS;
5867 			pp_state = PP_STATE_LS;
5868 		}
5869 
5870 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5871 			pp_support_state |= PP_STATE_SUPPORT_CG;
5872 			pp_state |= PP_STATE_CG;
5873 		}
5874 
5875 		if (state == AMD_CG_STATE_UNGATE)
5876 			pp_state = 0;
5877 
5878 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5879 				PP_BLOCK_GFX_MG,
5880 				pp_support_state,
5881 				pp_state);
5882 		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5883 			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5884 	}
5885 
5886 	return 0;
5887 }
5888 
5889 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5890 					  enum amd_clockgating_state state)
5891 {
5892 
5893 	uint32_t msg_id, pp_state = 0;
5894 	uint32_t pp_support_state = 0;
5895 
5896 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5897 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5898 			pp_support_state = PP_STATE_SUPPORT_LS;
5899 			pp_state = PP_STATE_LS;
5900 		}
5901 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5902 			pp_support_state |= PP_STATE_SUPPORT_CG;
5903 			pp_state |= PP_STATE_CG;
5904 		}
5905 		if (state == AMD_CG_STATE_UNGATE)
5906 			pp_state = 0;
5907 
5908 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5909 				PP_BLOCK_GFX_CG,
5910 				pp_support_state,
5911 				pp_state);
5912 		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5913 			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5914 	}
5915 
5916 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5917 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5918 			pp_support_state = PP_STATE_SUPPORT_LS;
5919 			pp_state = PP_STATE_LS;
5920 		}
5921 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5922 			pp_support_state |= PP_STATE_SUPPORT_CG;
5923 			pp_state |= PP_STATE_CG;
5924 		}
5925 		if (state == AMD_CG_STATE_UNGATE)
5926 			pp_state = 0;
5927 
5928 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5929 				PP_BLOCK_GFX_3D,
5930 				pp_support_state,
5931 				pp_state);
5932 		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5933 			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5934 	}
5935 
5936 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5937 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5938 			pp_support_state = PP_STATE_SUPPORT_LS;
5939 			pp_state = PP_STATE_LS;
5940 		}
5941 
5942 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5943 			pp_support_state |= PP_STATE_SUPPORT_CG;
5944 			pp_state |= PP_STATE_CG;
5945 		}
5946 
5947 		if (state == AMD_CG_STATE_UNGATE)
5948 			pp_state = 0;
5949 
5950 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5951 				PP_BLOCK_GFX_MG,
5952 				pp_support_state,
5953 				pp_state);
5954 		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5955 			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5956 	}
5957 
5958 	if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5959 		pp_support_state = PP_STATE_SUPPORT_LS;
5960 
5961 		if (state == AMD_CG_STATE_UNGATE)
5962 			pp_state = 0;
5963 		else
5964 			pp_state = PP_STATE_LS;
5965 
5966 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5967 				PP_BLOCK_GFX_RLC,
5968 				pp_support_state,
5969 				pp_state);
5970 		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5971 			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5972 	}
5973 
5974 	if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5975 		pp_support_state = PP_STATE_SUPPORT_LS;
5976 
5977 		if (state == AMD_CG_STATE_UNGATE)
5978 			pp_state = 0;
5979 		else
5980 			pp_state = PP_STATE_LS;
5981 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5982 			PP_BLOCK_GFX_CP,
5983 			pp_support_state,
5984 			pp_state);
5985 		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5986 			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5987 	}
5988 
5989 	return 0;
5990 }
5991 
5992 static int gfx_v8_0_set_clockgating_state(void *handle,
5993 					  enum amd_clockgating_state state)
5994 {
5995 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5996 
5997 	if (amdgpu_sriov_vf(adev))
5998 		return 0;
5999 
6000 	switch (adev->asic_type) {
6001 	case CHIP_FIJI:
6002 	case CHIP_CARRIZO:
6003 	case CHIP_STONEY:
6004 		gfx_v8_0_update_gfx_clock_gating(adev,
6005 						 state == AMD_CG_STATE_GATE);
6006 		break;
6007 	case CHIP_TONGA:
6008 		gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6009 		break;
6010 	case CHIP_POLARIS10:
6011 	case CHIP_POLARIS11:
6012 	case CHIP_POLARIS12:
6013 	case CHIP_VEGAM:
6014 		gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6015 		break;
6016 	default:
6017 		break;
6018 	}
6019 	return 0;
6020 }
6021 
6022 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6023 {
6024 	return ring->adev->wb.wb[ring->rptr_offs];
6025 }
6026 
6027 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6028 {
6029 	struct amdgpu_device *adev = ring->adev;
6030 
6031 	if (ring->use_doorbell)
6032 		/* XXX check if swapping is necessary on BE */
6033 		return ring->adev->wb.wb[ring->wptr_offs];
6034 	else
6035 		return RREG32(mmCP_RB0_WPTR);
6036 }
6037 
6038 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6039 {
6040 	struct amdgpu_device *adev = ring->adev;
6041 
6042 	if (ring->use_doorbell) {
6043 		/* XXX check if swapping is necessary on BE */
6044 		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6045 		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6046 	} else {
6047 		WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6048 		(void)RREG32(mmCP_RB0_WPTR);
6049 	}
6050 }
6051 
6052 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6053 {
6054 	u32 ref_and_mask, reg_mem_engine;
6055 
6056 	if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6057 	    (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6058 		switch (ring->me) {
6059 		case 1:
6060 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6061 			break;
6062 		case 2:
6063 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6064 			break;
6065 		default:
6066 			return;
6067 		}
6068 		reg_mem_engine = 0;
6069 	} else {
6070 		ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6071 		reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6072 	}
6073 
6074 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6075 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6076 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
6077 				 reg_mem_engine));
6078 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6079 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6080 	amdgpu_ring_write(ring, ref_and_mask);
6081 	amdgpu_ring_write(ring, ref_and_mask);
6082 	amdgpu_ring_write(ring, 0x20); /* poll interval */
6083 }
6084 
6085 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6086 {
6087 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6088 	amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6089 		EVENT_INDEX(4));
6090 
6091 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6092 	amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6093 		EVENT_INDEX(0));
6094 }
6095 
6096 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6097 					struct amdgpu_job *job,
6098 					struct amdgpu_ib *ib,
6099 					uint32_t flags)
6100 {
6101 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6102 	u32 header, control = 0;
6103 
6104 	if (ib->flags & AMDGPU_IB_FLAG_CE)
6105 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6106 	else
6107 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6108 
6109 	control |= ib->length_dw | (vmid << 24);
6110 
6111 	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6112 		control |= INDIRECT_BUFFER_PRE_ENB(1);
6113 
6114 		if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6115 			gfx_v8_0_ring_emit_de_meta(ring);
6116 	}
6117 
6118 	amdgpu_ring_write(ring, header);
6119 	amdgpu_ring_write(ring,
6120 #ifdef __BIG_ENDIAN
6121 			  (2 << 0) |
6122 #endif
6123 			  (ib->gpu_addr & 0xFFFFFFFC));
6124 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6125 	amdgpu_ring_write(ring, control);
6126 }
6127 
6128 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6129 					  struct amdgpu_job *job,
6130 					  struct amdgpu_ib *ib,
6131 					  uint32_t flags)
6132 {
6133 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6134 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6135 
6136 	/* Currently, there is a high possibility to get wave ID mismatch
6137 	 * between ME and GDS, leading to a hw deadlock, because ME generates
6138 	 * different wave IDs than the GDS expects. This situation happens
6139 	 * randomly when at least 5 compute pipes use GDS ordered append.
6140 	 * The wave IDs generated by ME are also wrong after suspend/resume.
6141 	 * Those are probably bugs somewhere else in the kernel driver.
6142 	 *
6143 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
6144 	 * GDS to 0 for this ring (me/pipe).
6145 	 */
6146 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
6147 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
6148 		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
6149 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
6150 	}
6151 
6152 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6153 	amdgpu_ring_write(ring,
6154 #ifdef __BIG_ENDIAN
6155 				(2 << 0) |
6156 #endif
6157 				(ib->gpu_addr & 0xFFFFFFFC));
6158 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6159 	amdgpu_ring_write(ring, control);
6160 }
6161 
6162 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6163 					 u64 seq, unsigned flags)
6164 {
6165 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6166 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6167 
6168 	/* EVENT_WRITE_EOP - flush caches, send int */
6169 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6170 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6171 				 EOP_TC_ACTION_EN |
6172 				 EOP_TC_WB_ACTION_EN |
6173 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6174 				 EVENT_INDEX(5)));
6175 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6176 	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6177 			  DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6178 	amdgpu_ring_write(ring, lower_32_bits(seq));
6179 	amdgpu_ring_write(ring, upper_32_bits(seq));
6180 
6181 }
6182 
6183 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6184 {
6185 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6186 	uint32_t seq = ring->fence_drv.sync_seq;
6187 	uint64_t addr = ring->fence_drv.gpu_addr;
6188 
6189 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6190 	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6191 				 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6192 				 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6193 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6194 	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6195 	amdgpu_ring_write(ring, seq);
6196 	amdgpu_ring_write(ring, 0xffffffff);
6197 	amdgpu_ring_write(ring, 4); /* poll interval */
6198 }
6199 
6200 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6201 					unsigned vmid, uint64_t pd_addr)
6202 {
6203 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6204 
6205 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6206 
6207 	/* wait for the invalidate to complete */
6208 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6209 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6210 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6211 				 WAIT_REG_MEM_ENGINE(0))); /* me */
6212 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6213 	amdgpu_ring_write(ring, 0);
6214 	amdgpu_ring_write(ring, 0); /* ref */
6215 	amdgpu_ring_write(ring, 0); /* mask */
6216 	amdgpu_ring_write(ring, 0x20); /* poll interval */
6217 
6218 	/* compute doesn't have PFP */
6219 	if (usepfp) {
6220 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6221 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6222 		amdgpu_ring_write(ring, 0x0);
6223 	}
6224 }
6225 
6226 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6227 {
6228 	return ring->adev->wb.wb[ring->wptr_offs];
6229 }
6230 
6231 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6232 {
6233 	struct amdgpu_device *adev = ring->adev;
6234 
6235 	/* XXX check if swapping is necessary on BE */
6236 	adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6237 	WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6238 }
6239 
6240 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
6241 					   bool acquire)
6242 {
6243 	struct amdgpu_device *adev = ring->adev;
6244 	int pipe_num, tmp, reg;
6245 	int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
6246 
6247 	pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
6248 
6249 	/* first me only has 2 entries, GFX and HP3D */
6250 	if (ring->me > 0)
6251 		pipe_num -= 2;
6252 
6253 	reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
6254 	tmp = RREG32(reg);
6255 	tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
6256 	WREG32(reg, tmp);
6257 }
6258 
6259 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
6260 					    struct amdgpu_ring *ring,
6261 					    bool acquire)
6262 {
6263 	int i, pipe;
6264 	bool reserve;
6265 	struct amdgpu_ring *iring;
6266 
6267 	mutex_lock(&adev->gfx.pipe_reserve_mutex);
6268 	pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
6269 	if (acquire)
6270 		set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6271 	else
6272 		clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6273 
6274 	if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
6275 		/* Clear all reservations - everyone reacquires all resources */
6276 		for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
6277 			gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
6278 						       true);
6279 
6280 		for (i = 0; i < adev->gfx.num_compute_rings; ++i)
6281 			gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
6282 						       true);
6283 	} else {
6284 		/* Lower all pipes without a current reservation */
6285 		for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
6286 			iring = &adev->gfx.gfx_ring[i];
6287 			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
6288 							   iring->me,
6289 							   iring->pipe,
6290 							   0);
6291 			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6292 			gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6293 		}
6294 
6295 		for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
6296 			iring = &adev->gfx.compute_ring[i];
6297 			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
6298 							   iring->me,
6299 							   iring->pipe,
6300 							   0);
6301 			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6302 			gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6303 		}
6304 	}
6305 
6306 	mutex_unlock(&adev->gfx.pipe_reserve_mutex);
6307 }
6308 
6309 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
6310 				      struct amdgpu_ring *ring,
6311 				      bool acquire)
6312 {
6313 	uint32_t pipe_priority = acquire ? 0x2 : 0x0;
6314 	uint32_t queue_priority = acquire ? 0xf : 0x0;
6315 
6316 	mutex_lock(&adev->srbm_mutex);
6317 	vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6318 
6319 	WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
6320 	WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
6321 
6322 	vi_srbm_select(adev, 0, 0, 0, 0);
6323 	mutex_unlock(&adev->srbm_mutex);
6324 }
6325 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
6326 					       enum drm_sched_priority priority)
6327 {
6328 	struct amdgpu_device *adev = ring->adev;
6329 	bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
6330 
6331 	if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6332 		return;
6333 
6334 	gfx_v8_0_hqd_set_priority(adev, ring, acquire);
6335 	gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
6336 }
6337 
6338 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6339 					     u64 addr, u64 seq,
6340 					     unsigned flags)
6341 {
6342 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6343 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6344 
6345 	/* RELEASE_MEM - flush caches, send int */
6346 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6347 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6348 				 EOP_TC_ACTION_EN |
6349 				 EOP_TC_WB_ACTION_EN |
6350 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6351 				 EVENT_INDEX(5)));
6352 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6353 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6354 	amdgpu_ring_write(ring, upper_32_bits(addr));
6355 	amdgpu_ring_write(ring, lower_32_bits(seq));
6356 	amdgpu_ring_write(ring, upper_32_bits(seq));
6357 }
6358 
6359 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6360 					 u64 seq, unsigned int flags)
6361 {
6362 	/* we only allocate 32bit for each seq wb address */
6363 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6364 
6365 	/* write fence seq to the "addr" */
6366 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6367 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6368 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6369 	amdgpu_ring_write(ring, lower_32_bits(addr));
6370 	amdgpu_ring_write(ring, upper_32_bits(addr));
6371 	amdgpu_ring_write(ring, lower_32_bits(seq));
6372 
6373 	if (flags & AMDGPU_FENCE_FLAG_INT) {
6374 		/* set register to trigger INT */
6375 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6376 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6377 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6378 		amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6379 		amdgpu_ring_write(ring, 0);
6380 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6381 	}
6382 }
6383 
6384 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6385 {
6386 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6387 	amdgpu_ring_write(ring, 0);
6388 }
6389 
6390 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6391 {
6392 	uint32_t dw2 = 0;
6393 
6394 	if (amdgpu_sriov_vf(ring->adev))
6395 		gfx_v8_0_ring_emit_ce_meta(ring);
6396 
6397 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6398 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6399 		gfx_v8_0_ring_emit_vgt_flush(ring);
6400 		/* set load_global_config & load_global_uconfig */
6401 		dw2 |= 0x8001;
6402 		/* set load_cs_sh_regs */
6403 		dw2 |= 0x01000000;
6404 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
6405 		dw2 |= 0x10002;
6406 
6407 		/* set load_ce_ram if preamble presented */
6408 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6409 			dw2 |= 0x10000000;
6410 	} else {
6411 		/* still load_ce_ram if this is the first time preamble presented
6412 		 * although there is no context switch happens.
6413 		 */
6414 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6415 			dw2 |= 0x10000000;
6416 	}
6417 
6418 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6419 	amdgpu_ring_write(ring, dw2);
6420 	amdgpu_ring_write(ring, 0);
6421 }
6422 
6423 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6424 {
6425 	unsigned ret;
6426 
6427 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6428 	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6429 	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6430 	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6431 	ret = ring->wptr & ring->buf_mask;
6432 	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6433 	return ret;
6434 }
6435 
6436 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6437 {
6438 	unsigned cur;
6439 
6440 	BUG_ON(offset > ring->buf_mask);
6441 	BUG_ON(ring->ring[offset] != 0x55aa55aa);
6442 
6443 	cur = (ring->wptr & ring->buf_mask) - 1;
6444 	if (likely(cur > offset))
6445 		ring->ring[offset] = cur - offset;
6446 	else
6447 		ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6448 }
6449 
6450 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6451 {
6452 	struct amdgpu_device *adev = ring->adev;
6453 
6454 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6455 	amdgpu_ring_write(ring, 0 |	/* src: register*/
6456 				(5 << 8) |	/* dst: memory */
6457 				(1 << 20));	/* write confirm */
6458 	amdgpu_ring_write(ring, reg);
6459 	amdgpu_ring_write(ring, 0);
6460 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6461 				adev->virt.reg_val_offs * 4));
6462 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6463 				adev->virt.reg_val_offs * 4));
6464 }
6465 
6466 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6467 				  uint32_t val)
6468 {
6469 	uint32_t cmd;
6470 
6471 	switch (ring->funcs->type) {
6472 	case AMDGPU_RING_TYPE_GFX:
6473 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6474 		break;
6475 	case AMDGPU_RING_TYPE_KIQ:
6476 		cmd = 1 << 16; /* no inc addr */
6477 		break;
6478 	default:
6479 		cmd = WR_CONFIRM;
6480 		break;
6481 	}
6482 
6483 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6484 	amdgpu_ring_write(ring, cmd);
6485 	amdgpu_ring_write(ring, reg);
6486 	amdgpu_ring_write(ring, 0);
6487 	amdgpu_ring_write(ring, val);
6488 }
6489 
6490 static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6491 {
6492 	struct amdgpu_device *adev = ring->adev;
6493 	uint32_t value = 0;
6494 
6495 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6496 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6497 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6498 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6499 	WREG32(mmSQ_CMD, value);
6500 }
6501 
6502 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6503 						 enum amdgpu_interrupt_state state)
6504 {
6505 	WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6506 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6507 }
6508 
6509 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6510 						     int me, int pipe,
6511 						     enum amdgpu_interrupt_state state)
6512 {
6513 	u32 mec_int_cntl, mec_int_cntl_reg;
6514 
6515 	/*
6516 	 * amdgpu controls only the first MEC. That's why this function only
6517 	 * handles the setting of interrupts for this specific MEC. All other
6518 	 * pipes' interrupts are set by amdkfd.
6519 	 */
6520 
6521 	if (me == 1) {
6522 		switch (pipe) {
6523 		case 0:
6524 			mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6525 			break;
6526 		case 1:
6527 			mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6528 			break;
6529 		case 2:
6530 			mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6531 			break;
6532 		case 3:
6533 			mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6534 			break;
6535 		default:
6536 			DRM_DEBUG("invalid pipe %d\n", pipe);
6537 			return;
6538 		}
6539 	} else {
6540 		DRM_DEBUG("invalid me %d\n", me);
6541 		return;
6542 	}
6543 
6544 	switch (state) {
6545 	case AMDGPU_IRQ_STATE_DISABLE:
6546 		mec_int_cntl = RREG32(mec_int_cntl_reg);
6547 		mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6548 		WREG32(mec_int_cntl_reg, mec_int_cntl);
6549 		break;
6550 	case AMDGPU_IRQ_STATE_ENABLE:
6551 		mec_int_cntl = RREG32(mec_int_cntl_reg);
6552 		mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6553 		WREG32(mec_int_cntl_reg, mec_int_cntl);
6554 		break;
6555 	default:
6556 		break;
6557 	}
6558 }
6559 
6560 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6561 					     struct amdgpu_irq_src *source,
6562 					     unsigned type,
6563 					     enum amdgpu_interrupt_state state)
6564 {
6565 	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6566 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6567 
6568 	return 0;
6569 }
6570 
6571 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6572 					      struct amdgpu_irq_src *source,
6573 					      unsigned type,
6574 					      enum amdgpu_interrupt_state state)
6575 {
6576 	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6577 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6578 
6579 	return 0;
6580 }
6581 
6582 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6583 					    struct amdgpu_irq_src *src,
6584 					    unsigned type,
6585 					    enum amdgpu_interrupt_state state)
6586 {
6587 	switch (type) {
6588 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6589 		gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6590 		break;
6591 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6592 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6593 		break;
6594 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6595 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6596 		break;
6597 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6598 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6599 		break;
6600 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6601 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6602 		break;
6603 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6604 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6605 		break;
6606 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6607 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6608 		break;
6609 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6610 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6611 		break;
6612 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6613 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6614 		break;
6615 	default:
6616 		break;
6617 	}
6618 	return 0;
6619 }
6620 
6621 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6622 					 struct amdgpu_irq_src *source,
6623 					 unsigned int type,
6624 					 enum amdgpu_interrupt_state state)
6625 {
6626 	int enable_flag;
6627 
6628 	switch (state) {
6629 	case AMDGPU_IRQ_STATE_DISABLE:
6630 		enable_flag = 0;
6631 		break;
6632 
6633 	case AMDGPU_IRQ_STATE_ENABLE:
6634 		enable_flag = 1;
6635 		break;
6636 
6637 	default:
6638 		return -EINVAL;
6639 	}
6640 
6641 	WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6642 	WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6643 	WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6644 	WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6645 	WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6646 	WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6647 		     enable_flag);
6648 	WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6649 		     enable_flag);
6650 	WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6651 		     enable_flag);
6652 	WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6653 		     enable_flag);
6654 	WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6655 		     enable_flag);
6656 	WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6657 		     enable_flag);
6658 	WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6659 		     enable_flag);
6660 	WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6661 		     enable_flag);
6662 
6663 	return 0;
6664 }
6665 
6666 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6667 				     struct amdgpu_irq_src *source,
6668 				     unsigned int type,
6669 				     enum amdgpu_interrupt_state state)
6670 {
6671 	int enable_flag;
6672 
6673 	switch (state) {
6674 	case AMDGPU_IRQ_STATE_DISABLE:
6675 		enable_flag = 1;
6676 		break;
6677 
6678 	case AMDGPU_IRQ_STATE_ENABLE:
6679 		enable_flag = 0;
6680 		break;
6681 
6682 	default:
6683 		return -EINVAL;
6684 	}
6685 
6686 	WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6687 		     enable_flag);
6688 
6689 	return 0;
6690 }
6691 
6692 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6693 			    struct amdgpu_irq_src *source,
6694 			    struct amdgpu_iv_entry *entry)
6695 {
6696 	int i;
6697 	u8 me_id, pipe_id, queue_id;
6698 	struct amdgpu_ring *ring;
6699 
6700 	DRM_DEBUG("IH: CP EOP\n");
6701 	me_id = (entry->ring_id & 0x0c) >> 2;
6702 	pipe_id = (entry->ring_id & 0x03) >> 0;
6703 	queue_id = (entry->ring_id & 0x70) >> 4;
6704 
6705 	switch (me_id) {
6706 	case 0:
6707 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6708 		break;
6709 	case 1:
6710 	case 2:
6711 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6712 			ring = &adev->gfx.compute_ring[i];
6713 			/* Per-queue interrupt is supported for MEC starting from VI.
6714 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
6715 			  */
6716 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6717 				amdgpu_fence_process(ring);
6718 		}
6719 		break;
6720 	}
6721 	return 0;
6722 }
6723 
6724 static void gfx_v8_0_fault(struct amdgpu_device *adev,
6725 			   struct amdgpu_iv_entry *entry)
6726 {
6727 	u8 me_id, pipe_id, queue_id;
6728 	struct amdgpu_ring *ring;
6729 	int i;
6730 
6731 	me_id = (entry->ring_id & 0x0c) >> 2;
6732 	pipe_id = (entry->ring_id & 0x03) >> 0;
6733 	queue_id = (entry->ring_id & 0x70) >> 4;
6734 
6735 	switch (me_id) {
6736 	case 0:
6737 		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6738 		break;
6739 	case 1:
6740 	case 2:
6741 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6742 			ring = &adev->gfx.compute_ring[i];
6743 			if (ring->me == me_id && ring->pipe == pipe_id &&
6744 			    ring->queue == queue_id)
6745 				drm_sched_fault(&ring->sched);
6746 		}
6747 		break;
6748 	}
6749 }
6750 
6751 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6752 				 struct amdgpu_irq_src *source,
6753 				 struct amdgpu_iv_entry *entry)
6754 {
6755 	DRM_ERROR("Illegal register access in command stream\n");
6756 	gfx_v8_0_fault(adev, entry);
6757 	return 0;
6758 }
6759 
6760 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6761 				  struct amdgpu_irq_src *source,
6762 				  struct amdgpu_iv_entry *entry)
6763 {
6764 	DRM_ERROR("Illegal instruction in command stream\n");
6765 	gfx_v8_0_fault(adev, entry);
6766 	return 0;
6767 }
6768 
6769 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6770 				     struct amdgpu_irq_src *source,
6771 				     struct amdgpu_iv_entry *entry)
6772 {
6773 	DRM_ERROR("CP EDC/ECC error detected.");
6774 	return 0;
6775 }
6776 
6777 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data)
6778 {
6779 	u32 enc, se_id, sh_id, cu_id;
6780 	char type[20];
6781 	int sq_edc_source = -1;
6782 
6783 	enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6784 	se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6785 
6786 	switch (enc) {
6787 		case 0:
6788 			DRM_INFO("SQ general purpose intr detected:"
6789 					"se_id %d, immed_overflow %d, host_reg_overflow %d,"
6790 					"host_cmd_overflow %d, cmd_timestamp %d,"
6791 					"reg_timestamp %d, thread_trace_buff_full %d,"
6792 					"wlt %d, thread_trace %d.\n",
6793 					se_id,
6794 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6795 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6796 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6797 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6798 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6799 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6800 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6801 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6802 					);
6803 			break;
6804 		case 1:
6805 		case 2:
6806 
6807 			cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6808 			sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6809 
6810 			/*
6811 			 * This function can be called either directly from ISR
6812 			 * or from BH in which case we can access SQ_EDC_INFO
6813 			 * instance
6814 			 */
6815 			if (in_task()) {
6816 				mutex_lock(&adev->grbm_idx_mutex);
6817 				gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
6818 
6819 				sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6820 
6821 				gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6822 				mutex_unlock(&adev->grbm_idx_mutex);
6823 			}
6824 
6825 			if (enc == 1)
6826 				sprintf(type, "instruction intr");
6827 			else
6828 				sprintf(type, "EDC/ECC error");
6829 
6830 			DRM_INFO(
6831 				"SQ %s detected: "
6832 					"se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6833 					"trap %s, sq_ed_info.source %s.\n",
6834 					type, se_id, sh_id, cu_id,
6835 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6836 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6837 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6838 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6839 					(sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6840 				);
6841 			break;
6842 		default:
6843 			DRM_ERROR("SQ invalid encoding type\n.");
6844 	}
6845 }
6846 
6847 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6848 {
6849 
6850 	struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6851 	struct sq_work *sq_work = container_of(work, struct sq_work, work);
6852 
6853 	gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data);
6854 }
6855 
6856 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6857 			   struct amdgpu_irq_src *source,
6858 			   struct amdgpu_iv_entry *entry)
6859 {
6860 	unsigned ih_data = entry->src_data[0];
6861 
6862 	/*
6863 	 * Try to submit work so SQ_EDC_INFO can be accessed from
6864 	 * BH. If previous work submission hasn't finished yet
6865 	 * just print whatever info is possible directly from the ISR.
6866 	 */
6867 	if (work_pending(&adev->gfx.sq_work.work)) {
6868 		gfx_v8_0_parse_sq_irq(adev, ih_data);
6869 	} else {
6870 		adev->gfx.sq_work.ih_data = ih_data;
6871 		schedule_work(&adev->gfx.sq_work.work);
6872 	}
6873 
6874 	return 0;
6875 }
6876 
6877 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6878 	.name = "gfx_v8_0",
6879 	.early_init = gfx_v8_0_early_init,
6880 	.late_init = gfx_v8_0_late_init,
6881 	.sw_init = gfx_v8_0_sw_init,
6882 	.sw_fini = gfx_v8_0_sw_fini,
6883 	.hw_init = gfx_v8_0_hw_init,
6884 	.hw_fini = gfx_v8_0_hw_fini,
6885 	.suspend = gfx_v8_0_suspend,
6886 	.resume = gfx_v8_0_resume,
6887 	.is_idle = gfx_v8_0_is_idle,
6888 	.wait_for_idle = gfx_v8_0_wait_for_idle,
6889 	.check_soft_reset = gfx_v8_0_check_soft_reset,
6890 	.pre_soft_reset = gfx_v8_0_pre_soft_reset,
6891 	.soft_reset = gfx_v8_0_soft_reset,
6892 	.post_soft_reset = gfx_v8_0_post_soft_reset,
6893 	.set_clockgating_state = gfx_v8_0_set_clockgating_state,
6894 	.set_powergating_state = gfx_v8_0_set_powergating_state,
6895 	.get_clockgating_state = gfx_v8_0_get_clockgating_state,
6896 };
6897 
6898 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6899 	.type = AMDGPU_RING_TYPE_GFX,
6900 	.align_mask = 0xff,
6901 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6902 	.support_64bit_ptrs = false,
6903 	.get_rptr = gfx_v8_0_ring_get_rptr,
6904 	.get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6905 	.set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6906 	.emit_frame_size = /* maximum 215dw if count 16 IBs in */
6907 		5 +  /* COND_EXEC */
6908 		7 +  /* PIPELINE_SYNC */
6909 		VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6910 		8 +  /* FENCE for VM_FLUSH */
6911 		20 + /* GDS switch */
6912 		4 + /* double SWITCH_BUFFER,
6913 		       the first COND_EXEC jump to the place just
6914 			   prior to this double SWITCH_BUFFER  */
6915 		5 + /* COND_EXEC */
6916 		7 +	 /*	HDP_flush */
6917 		4 +	 /*	VGT_flush */
6918 		14 + /*	CE_META */
6919 		31 + /*	DE_META */
6920 		3 + /* CNTX_CTRL */
6921 		5 + /* HDP_INVL */
6922 		8 + 8 + /* FENCE x2 */
6923 		2, /* SWITCH_BUFFER */
6924 	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_gfx */
6925 	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6926 	.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6927 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6928 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6929 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6930 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6931 	.test_ring = gfx_v8_0_ring_test_ring,
6932 	.test_ib = gfx_v8_0_ring_test_ib,
6933 	.insert_nop = amdgpu_ring_insert_nop,
6934 	.pad_ib = amdgpu_ring_generic_pad_ib,
6935 	.emit_switch_buffer = gfx_v8_ring_emit_sb,
6936 	.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6937 	.init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6938 	.patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6939 	.emit_wreg = gfx_v8_0_ring_emit_wreg,
6940 	.soft_recovery = gfx_v8_0_ring_soft_recovery,
6941 };
6942 
6943 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6944 	.type = AMDGPU_RING_TYPE_COMPUTE,
6945 	.align_mask = 0xff,
6946 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6947 	.support_64bit_ptrs = false,
6948 	.get_rptr = gfx_v8_0_ring_get_rptr,
6949 	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
6950 	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
6951 	.emit_frame_size =
6952 		20 + /* gfx_v8_0_ring_emit_gds_switch */
6953 		7 + /* gfx_v8_0_ring_emit_hdp_flush */
6954 		5 + /* hdp_invalidate */
6955 		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6956 		VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6957 		7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6958 	.emit_ib_size =	7, /* gfx_v8_0_ring_emit_ib_compute */
6959 	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
6960 	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
6961 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6962 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6963 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6964 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6965 	.test_ring = gfx_v8_0_ring_test_ring,
6966 	.test_ib = gfx_v8_0_ring_test_ib,
6967 	.insert_nop = amdgpu_ring_insert_nop,
6968 	.pad_ib = amdgpu_ring_generic_pad_ib,
6969 	.set_priority = gfx_v8_0_ring_set_priority_compute,
6970 	.emit_wreg = gfx_v8_0_ring_emit_wreg,
6971 };
6972 
6973 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6974 	.type = AMDGPU_RING_TYPE_KIQ,
6975 	.align_mask = 0xff,
6976 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6977 	.support_64bit_ptrs = false,
6978 	.get_rptr = gfx_v8_0_ring_get_rptr,
6979 	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
6980 	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
6981 	.emit_frame_size =
6982 		20 + /* gfx_v8_0_ring_emit_gds_switch */
6983 		7 + /* gfx_v8_0_ring_emit_hdp_flush */
6984 		5 + /* hdp_invalidate */
6985 		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6986 		17 + /* gfx_v8_0_ring_emit_vm_flush */
6987 		7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6988 	.emit_ib_size =	7, /* gfx_v8_0_ring_emit_ib_compute */
6989 	.emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6990 	.test_ring = gfx_v8_0_ring_test_ring,
6991 	.insert_nop = amdgpu_ring_insert_nop,
6992 	.pad_ib = amdgpu_ring_generic_pad_ib,
6993 	.emit_rreg = gfx_v8_0_ring_emit_rreg,
6994 	.emit_wreg = gfx_v8_0_ring_emit_wreg,
6995 };
6996 
6997 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6998 {
6999 	int i;
7000 
7001 	adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7002 
7003 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7004 		adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7005 
7006 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
7007 		adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7008 }
7009 
7010 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7011 	.set = gfx_v8_0_set_eop_interrupt_state,
7012 	.process = gfx_v8_0_eop_irq,
7013 };
7014 
7015 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7016 	.set = gfx_v8_0_set_priv_reg_fault_state,
7017 	.process = gfx_v8_0_priv_reg_irq,
7018 };
7019 
7020 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7021 	.set = gfx_v8_0_set_priv_inst_fault_state,
7022 	.process = gfx_v8_0_priv_inst_irq,
7023 };
7024 
7025 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
7026 	.set = gfx_v8_0_set_cp_ecc_int_state,
7027 	.process = gfx_v8_0_cp_ecc_error_irq,
7028 };
7029 
7030 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
7031 	.set = gfx_v8_0_set_sq_int_state,
7032 	.process = gfx_v8_0_sq_irq,
7033 };
7034 
7035 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7036 {
7037 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7038 	adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7039 
7040 	adev->gfx.priv_reg_irq.num_types = 1;
7041 	adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7042 
7043 	adev->gfx.priv_inst_irq.num_types = 1;
7044 	adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7045 
7046 	adev->gfx.cp_ecc_error_irq.num_types = 1;
7047 	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7048 
7049 	adev->gfx.sq_irq.num_types = 1;
7050 	adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7051 }
7052 
7053 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7054 {
7055 	adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7056 }
7057 
7058 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7059 {
7060 	/* init asci gds info */
7061 	adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
7062 	adev->gds.gws_size = 64;
7063 	adev->gds.oa_size = 16;
7064 	adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
7065 }
7066 
7067 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7068 						 u32 bitmap)
7069 {
7070 	u32 data;
7071 
7072 	if (!bitmap)
7073 		return;
7074 
7075 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7076 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7077 
7078 	WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7079 }
7080 
7081 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7082 {
7083 	u32 data, mask;
7084 
7085 	data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7086 		RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7087 
7088 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7089 
7090 	return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7091 }
7092 
7093 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7094 {
7095 	int i, j, k, counter, active_cu_number = 0;
7096 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7097 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7098 	unsigned disable_masks[4 * 2];
7099 	u32 ao_cu_num;
7100 
7101 	memset(cu_info, 0, sizeof(*cu_info));
7102 
7103 	if (adev->flags & AMD_IS_APU)
7104 		ao_cu_num = 2;
7105 	else
7106 		ao_cu_num = adev->gfx.config.max_cu_per_sh;
7107 
7108 	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7109 
7110 	mutex_lock(&adev->grbm_idx_mutex);
7111 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7112 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7113 			mask = 1;
7114 			ao_bitmap = 0;
7115 			counter = 0;
7116 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7117 			if (i < 4 && j < 2)
7118 				gfx_v8_0_set_user_cu_inactive_bitmap(
7119 					adev, disable_masks[i * 2 + j]);
7120 			bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7121 			cu_info->bitmap[i][j] = bitmap;
7122 
7123 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7124 				if (bitmap & mask) {
7125 					if (counter < ao_cu_num)
7126 						ao_bitmap |= mask;
7127 					counter ++;
7128 				}
7129 				mask <<= 1;
7130 			}
7131 			active_cu_number += counter;
7132 			if (i < 2 && j < 2)
7133 				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7134 			cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7135 		}
7136 	}
7137 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7138 	mutex_unlock(&adev->grbm_idx_mutex);
7139 
7140 	cu_info->number = active_cu_number;
7141 	cu_info->ao_cu_mask = ao_cu_mask;
7142 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7143 	cu_info->max_waves_per_simd = 10;
7144 	cu_info->max_scratch_slots_per_cu = 32;
7145 	cu_info->wave_front_size = 64;
7146 	cu_info->lds_size = 64;
7147 }
7148 
7149 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7150 {
7151 	.type = AMD_IP_BLOCK_TYPE_GFX,
7152 	.major = 8,
7153 	.minor = 0,
7154 	.rev = 0,
7155 	.funcs = &gfx_v8_0_ip_funcs,
7156 };
7157 
7158 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7159 {
7160 	.type = AMD_IP_BLOCK_TYPE_GFX,
7161 	.major = 8,
7162 	.minor = 1,
7163 	.rev = 0,
7164 	.funcs = &gfx_v8_0_ip_funcs,
7165 };
7166 
7167 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7168 {
7169 	uint64_t ce_payload_addr;
7170 	int cnt_ce;
7171 	union {
7172 		struct vi_ce_ib_state regular;
7173 		struct vi_ce_ib_state_chained_ib chained;
7174 	} ce_payload = {};
7175 
7176 	if (ring->adev->virt.chained_ib_support) {
7177 		ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7178 			offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7179 		cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7180 	} else {
7181 		ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7182 			offsetof(struct vi_gfx_meta_data, ce_payload);
7183 		cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7184 	}
7185 
7186 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7187 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7188 				WRITE_DATA_DST_SEL(8) |
7189 				WR_CONFIRM) |
7190 				WRITE_DATA_CACHE_POLICY(0));
7191 	amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7192 	amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7193 	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7194 }
7195 
7196 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7197 {
7198 	uint64_t de_payload_addr, gds_addr, csa_addr;
7199 	int cnt_de;
7200 	union {
7201 		struct vi_de_ib_state regular;
7202 		struct vi_de_ib_state_chained_ib chained;
7203 	} de_payload = {};
7204 
7205 	csa_addr = amdgpu_csa_vaddr(ring->adev);
7206 	gds_addr = csa_addr + 4096;
7207 	if (ring->adev->virt.chained_ib_support) {
7208 		de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7209 		de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7210 		de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7211 		cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7212 	} else {
7213 		de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7214 		de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7215 		de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7216 		cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7217 	}
7218 
7219 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7220 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7221 				WRITE_DATA_DST_SEL(8) |
7222 				WR_CONFIRM) |
7223 				WRITE_DATA_CACHE_POLICY(0));
7224 	amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7225 	amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7226 	amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7227 }
7228