xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c (revision de2bdb3d)
1 /*
2  * Copyright 2015 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "amdgpu.h"
25 #include "amdgpu_ih.h"
26 #include "amdgpu_gfx.h"
27 #include "amdgpu_ucode.h"
28 #include "si/clearstate_si.h"
29 #include "si/sid.h"
30 
31 #define GFX6_NUM_GFX_RINGS     1
32 #define GFX6_NUM_COMPUTE_RINGS 2
33 #define STATIC_PER_CU_PG_ENABLE                    (1 << 3)
34 #define DYN_PER_CU_PG_ENABLE                       (1 << 2)
35 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
36 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
37 
38 
39 static void gfx_v6_0_set_ring_funcs(struct amdgpu_device *adev);
40 static void gfx_v6_0_set_irq_funcs(struct amdgpu_device *adev);
41 static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev);
42 
43 MODULE_FIRMWARE("radeon/tahiti_pfp.bin");
44 MODULE_FIRMWARE("radeon/tahiti_me.bin");
45 MODULE_FIRMWARE("radeon/tahiti_ce.bin");
46 MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
47 
48 MODULE_FIRMWARE("radeon/pitcairn_pfp.bin");
49 MODULE_FIRMWARE("radeon/pitcairn_me.bin");
50 MODULE_FIRMWARE("radeon/pitcairn_ce.bin");
51 MODULE_FIRMWARE("radeon/pitcairn_rlc.bin");
52 
53 MODULE_FIRMWARE("radeon/verde_pfp.bin");
54 MODULE_FIRMWARE("radeon/verde_me.bin");
55 MODULE_FIRMWARE("radeon/verde_ce.bin");
56 MODULE_FIRMWARE("radeon/verde_rlc.bin");
57 
58 MODULE_FIRMWARE("radeon/oland_pfp.bin");
59 MODULE_FIRMWARE("radeon/oland_me.bin");
60 MODULE_FIRMWARE("radeon/oland_ce.bin");
61 MODULE_FIRMWARE("radeon/oland_rlc.bin");
62 
63 MODULE_FIRMWARE("radeon/hainan_pfp.bin");
64 MODULE_FIRMWARE("radeon/hainan_me.bin");
65 MODULE_FIRMWARE("radeon/hainan_ce.bin");
66 MODULE_FIRMWARE("radeon/hainan_rlc.bin");
67 
68 static u32 gfx_v6_0_get_csb_size(struct amdgpu_device *adev);
69 static void gfx_v6_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer);
70 //static void gfx_v6_0_init_cp_pg_table(struct amdgpu_device *adev);
71 static void gfx_v6_0_init_pg(struct amdgpu_device *adev);
72 
73 
74 static const u32 verde_rlc_save_restore_register_list[] =
75 {
76 	(0x8000 << 16) | (0x98f4 >> 2),
77 	0x00000000,
78 	(0x8040 << 16) | (0x98f4 >> 2),
79 	0x00000000,
80 	(0x8000 << 16) | (0xe80 >> 2),
81 	0x00000000,
82 	(0x8040 << 16) | (0xe80 >> 2),
83 	0x00000000,
84 	(0x8000 << 16) | (0x89bc >> 2),
85 	0x00000000,
86 	(0x8040 << 16) | (0x89bc >> 2),
87 	0x00000000,
88 	(0x8000 << 16) | (0x8c1c >> 2),
89 	0x00000000,
90 	(0x8040 << 16) | (0x8c1c >> 2),
91 	0x00000000,
92 	(0x9c00 << 16) | (0x98f0 >> 2),
93 	0x00000000,
94 	(0x9c00 << 16) | (0xe7c >> 2),
95 	0x00000000,
96 	(0x8000 << 16) | (0x9148 >> 2),
97 	0x00000000,
98 	(0x8040 << 16) | (0x9148 >> 2),
99 	0x00000000,
100 	(0x9c00 << 16) | (0x9150 >> 2),
101 	0x00000000,
102 	(0x9c00 << 16) | (0x897c >> 2),
103 	0x00000000,
104 	(0x9c00 << 16) | (0x8d8c >> 2),
105 	0x00000000,
106 	(0x9c00 << 16) | (0xac54 >> 2),
107 	0X00000000,
108 	0x3,
109 	(0x9c00 << 16) | (0x98f8 >> 2),
110 	0x00000000,
111 	(0x9c00 << 16) | (0x9910 >> 2),
112 	0x00000000,
113 	(0x9c00 << 16) | (0x9914 >> 2),
114 	0x00000000,
115 	(0x9c00 << 16) | (0x9918 >> 2),
116 	0x00000000,
117 	(0x9c00 << 16) | (0x991c >> 2),
118 	0x00000000,
119 	(0x9c00 << 16) | (0x9920 >> 2),
120 	0x00000000,
121 	(0x9c00 << 16) | (0x9924 >> 2),
122 	0x00000000,
123 	(0x9c00 << 16) | (0x9928 >> 2),
124 	0x00000000,
125 	(0x9c00 << 16) | (0x992c >> 2),
126 	0x00000000,
127 	(0x9c00 << 16) | (0x9930 >> 2),
128 	0x00000000,
129 	(0x9c00 << 16) | (0x9934 >> 2),
130 	0x00000000,
131 	(0x9c00 << 16) | (0x9938 >> 2),
132 	0x00000000,
133 	(0x9c00 << 16) | (0x993c >> 2),
134 	0x00000000,
135 	(0x9c00 << 16) | (0x9940 >> 2),
136 	0x00000000,
137 	(0x9c00 << 16) | (0x9944 >> 2),
138 	0x00000000,
139 	(0x9c00 << 16) | (0x9948 >> 2),
140 	0x00000000,
141 	(0x9c00 << 16) | (0x994c >> 2),
142 	0x00000000,
143 	(0x9c00 << 16) | (0x9950 >> 2),
144 	0x00000000,
145 	(0x9c00 << 16) | (0x9954 >> 2),
146 	0x00000000,
147 	(0x9c00 << 16) | (0x9958 >> 2),
148 	0x00000000,
149 	(0x9c00 << 16) | (0x995c >> 2),
150 	0x00000000,
151 	(0x9c00 << 16) | (0x9960 >> 2),
152 	0x00000000,
153 	(0x9c00 << 16) | (0x9964 >> 2),
154 	0x00000000,
155 	(0x9c00 << 16) | (0x9968 >> 2),
156 	0x00000000,
157 	(0x9c00 << 16) | (0x996c >> 2),
158 	0x00000000,
159 	(0x9c00 << 16) | (0x9970 >> 2),
160 	0x00000000,
161 	(0x9c00 << 16) | (0x9974 >> 2),
162 	0x00000000,
163 	(0x9c00 << 16) | (0x9978 >> 2),
164 	0x00000000,
165 	(0x9c00 << 16) | (0x997c >> 2),
166 	0x00000000,
167 	(0x9c00 << 16) | (0x9980 >> 2),
168 	0x00000000,
169 	(0x9c00 << 16) | (0x9984 >> 2),
170 	0x00000000,
171 	(0x9c00 << 16) | (0x9988 >> 2),
172 	0x00000000,
173 	(0x9c00 << 16) | (0x998c >> 2),
174 	0x00000000,
175 	(0x9c00 << 16) | (0x8c00 >> 2),
176 	0x00000000,
177 	(0x9c00 << 16) | (0x8c14 >> 2),
178 	0x00000000,
179 	(0x9c00 << 16) | (0x8c04 >> 2),
180 	0x00000000,
181 	(0x9c00 << 16) | (0x8c08 >> 2),
182 	0x00000000,
183 	(0x8000 << 16) | (0x9b7c >> 2),
184 	0x00000000,
185 	(0x8040 << 16) | (0x9b7c >> 2),
186 	0x00000000,
187 	(0x8000 << 16) | (0xe84 >> 2),
188 	0x00000000,
189 	(0x8040 << 16) | (0xe84 >> 2),
190 	0x00000000,
191 	(0x8000 << 16) | (0x89c0 >> 2),
192 	0x00000000,
193 	(0x8040 << 16) | (0x89c0 >> 2),
194 	0x00000000,
195 	(0x8000 << 16) | (0x914c >> 2),
196 	0x00000000,
197 	(0x8040 << 16) | (0x914c >> 2),
198 	0x00000000,
199 	(0x8000 << 16) | (0x8c20 >> 2),
200 	0x00000000,
201 	(0x8040 << 16) | (0x8c20 >> 2),
202 	0x00000000,
203 	(0x8000 << 16) | (0x9354 >> 2),
204 	0x00000000,
205 	(0x8040 << 16) | (0x9354 >> 2),
206 	0x00000000,
207 	(0x9c00 << 16) | (0x9060 >> 2),
208 	0x00000000,
209 	(0x9c00 << 16) | (0x9364 >> 2),
210 	0x00000000,
211 	(0x9c00 << 16) | (0x9100 >> 2),
212 	0x00000000,
213 	(0x9c00 << 16) | (0x913c >> 2),
214 	0x00000000,
215 	(0x8000 << 16) | (0x90e0 >> 2),
216 	0x00000000,
217 	(0x8000 << 16) | (0x90e4 >> 2),
218 	0x00000000,
219 	(0x8000 << 16) | (0x90e8 >> 2),
220 	0x00000000,
221 	(0x8040 << 16) | (0x90e0 >> 2),
222 	0x00000000,
223 	(0x8040 << 16) | (0x90e4 >> 2),
224 	0x00000000,
225 	(0x8040 << 16) | (0x90e8 >> 2),
226 	0x00000000,
227 	(0x9c00 << 16) | (0x8bcc >> 2),
228 	0x00000000,
229 	(0x9c00 << 16) | (0x8b24 >> 2),
230 	0x00000000,
231 	(0x9c00 << 16) | (0x88c4 >> 2),
232 	0x00000000,
233 	(0x9c00 << 16) | (0x8e50 >> 2),
234 	0x00000000,
235 	(0x9c00 << 16) | (0x8c0c >> 2),
236 	0x00000000,
237 	(0x9c00 << 16) | (0x8e58 >> 2),
238 	0x00000000,
239 	(0x9c00 << 16) | (0x8e5c >> 2),
240 	0x00000000,
241 	(0x9c00 << 16) | (0x9508 >> 2),
242 	0x00000000,
243 	(0x9c00 << 16) | (0x950c >> 2),
244 	0x00000000,
245 	(0x9c00 << 16) | (0x9494 >> 2),
246 	0x00000000,
247 	(0x9c00 << 16) | (0xac0c >> 2),
248 	0x00000000,
249 	(0x9c00 << 16) | (0xac10 >> 2),
250 	0x00000000,
251 	(0x9c00 << 16) | (0xac14 >> 2),
252 	0x00000000,
253 	(0x9c00 << 16) | (0xae00 >> 2),
254 	0x00000000,
255 	(0x9c00 << 16) | (0xac08 >> 2),
256 	0x00000000,
257 	(0x9c00 << 16) | (0x88d4 >> 2),
258 	0x00000000,
259 	(0x9c00 << 16) | (0x88c8 >> 2),
260 	0x00000000,
261 	(0x9c00 << 16) | (0x88cc >> 2),
262 	0x00000000,
263 	(0x9c00 << 16) | (0x89b0 >> 2),
264 	0x00000000,
265 	(0x9c00 << 16) | (0x8b10 >> 2),
266 	0x00000000,
267 	(0x9c00 << 16) | (0x8a14 >> 2),
268 	0x00000000,
269 	(0x9c00 << 16) | (0x9830 >> 2),
270 	0x00000000,
271 	(0x9c00 << 16) | (0x9834 >> 2),
272 	0x00000000,
273 	(0x9c00 << 16) | (0x9838 >> 2),
274 	0x00000000,
275 	(0x9c00 << 16) | (0x9a10 >> 2),
276 	0x00000000,
277 	(0x8000 << 16) | (0x9870 >> 2),
278 	0x00000000,
279 	(0x8000 << 16) | (0x9874 >> 2),
280 	0x00000000,
281 	(0x8001 << 16) | (0x9870 >> 2),
282 	0x00000000,
283 	(0x8001 << 16) | (0x9874 >> 2),
284 	0x00000000,
285 	(0x8040 << 16) | (0x9870 >> 2),
286 	0x00000000,
287 	(0x8040 << 16) | (0x9874 >> 2),
288 	0x00000000,
289 	(0x8041 << 16) | (0x9870 >> 2),
290 	0x00000000,
291 	(0x8041 << 16) | (0x9874 >> 2),
292 	0x00000000,
293 	0x00000000
294 };
295 
296 static int gfx_v6_0_init_microcode(struct amdgpu_device *adev)
297 {
298 	const char *chip_name;
299 	char fw_name[30];
300 	int err;
301 	const struct gfx_firmware_header_v1_0 *cp_hdr;
302 	const struct rlc_firmware_header_v1_0 *rlc_hdr;
303 
304 	DRM_DEBUG("\n");
305 
306 	switch (adev->asic_type) {
307 	case CHIP_TAHITI:
308 		chip_name = "tahiti";
309 		break;
310 	case CHIP_PITCAIRN:
311 		chip_name = "pitcairn";
312 		break;
313 	case CHIP_VERDE:
314 		chip_name = "verde";
315 		break;
316 	case CHIP_OLAND:
317 		chip_name = "oland";
318 		break;
319 	case CHIP_HAINAN:
320 		chip_name = "hainan";
321 		break;
322 	default: BUG();
323 	}
324 
325 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
326 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
327 	if (err)
328 		goto out;
329 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
330 	if (err)
331 		goto out;
332 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
333 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
334 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
335 
336 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
337 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
338 	if (err)
339 		goto out;
340 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
341 	if (err)
342 		goto out;
343 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
344 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
345 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
346 
347 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
348 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
349 	if (err)
350 		goto out;
351 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
352 	if (err)
353 		goto out;
354 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
355 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
356 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
357 
358 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
359 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
360 	if (err)
361 		goto out;
362 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
363 	rlc_hdr = (const struct rlc_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
364 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
365 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
366 
367 out:
368 	if (err) {
369 		printk(KERN_ERR
370 		       "gfx6: Failed to load firmware \"%s\"\n",
371 		       fw_name);
372 		release_firmware(adev->gfx.pfp_fw);
373 		adev->gfx.pfp_fw = NULL;
374 		release_firmware(adev->gfx.me_fw);
375 		adev->gfx.me_fw = NULL;
376 		release_firmware(adev->gfx.ce_fw);
377 		adev->gfx.ce_fw = NULL;
378 		release_firmware(adev->gfx.rlc_fw);
379 		adev->gfx.rlc_fw = NULL;
380 	}
381 	return err;
382 }
383 
384 static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev)
385 {
386 	const u32 num_tile_mode_states = 32;
387 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
388 
389 	switch (adev->gfx.config.mem_row_size_in_kb) {
390 	case 1:
391 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
392 		break;
393 	case 2:
394 	default:
395 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
396 		break;
397 	case 4:
398 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
399 		break;
400 	}
401 
402 	if (adev->asic_type == CHIP_VERDE ||
403 		adev->asic_type == CHIP_OLAND ||
404 		adev->asic_type == CHIP_HAINAN) {
405 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
406 			switch (reg_offset) {
407 			case 0:
408 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
409 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
410 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
411 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
412 						 NUM_BANKS(ADDR_SURF_16_BANK) |
413 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
414 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
415 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
416 				break;
417 			case 1:
418 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
419 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
420 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
421 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
422 						 NUM_BANKS(ADDR_SURF_16_BANK) |
423 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
424 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
425 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
426 				break;
427 			case 2:
428 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
429 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
430 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
431 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
432 						 NUM_BANKS(ADDR_SURF_16_BANK) |
433 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
434 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
435 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
436 				break;
437 			case 3:
438 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
439 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
440 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
441 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
442 						 NUM_BANKS(ADDR_SURF_16_BANK) |
443 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
444 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
445 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
446 				break;
447 			case 4:
448 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
449 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
450 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
451 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
452 						 NUM_BANKS(ADDR_SURF_16_BANK) |
453 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
454 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
455 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
456 				break;
457 			case 5:
458 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
459 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
460 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
461 						 TILE_SPLIT(split_equal_to_row_size) |
462 						 NUM_BANKS(ADDR_SURF_16_BANK) |
463 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
464 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
465 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
466 				break;
467 			case 6:
468 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
469 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
470 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
471 						 TILE_SPLIT(split_equal_to_row_size) |
472 						 NUM_BANKS(ADDR_SURF_16_BANK) |
473 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
474 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
475 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
476 				break;
477 			case 7:
478 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
479 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
480 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
481 						 TILE_SPLIT(split_equal_to_row_size) |
482 						 NUM_BANKS(ADDR_SURF_16_BANK) |
483 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
484 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
485 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
486 				break;
487 			case 8:
488 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
489 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
490 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
491 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
492 						 NUM_BANKS(ADDR_SURF_16_BANK) |
493 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
494 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
495 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
496 				break;
497 			case 9:
498 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
499 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
500 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
501 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
502 						 NUM_BANKS(ADDR_SURF_16_BANK) |
503 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
504 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
505 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
506 				break;
507 			case 10:
508 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
509 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
510 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
511 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
512 						 NUM_BANKS(ADDR_SURF_16_BANK) |
513 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
514 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
515 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
516 				break;
517 			case 11:
518 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
519 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
520 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
521 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
522 						 NUM_BANKS(ADDR_SURF_16_BANK) |
523 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
524 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
525 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
526 				break;
527 			case 12:
528 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
529 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
530 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
531 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
532 						 NUM_BANKS(ADDR_SURF_16_BANK) |
533 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
534 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
535 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
536 				break;
537 			case 13:
538 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
539 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
540 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
541 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
542 						 NUM_BANKS(ADDR_SURF_16_BANK) |
543 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
544 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
545 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
546 				break;
547 			case 14:
548 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
549 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
550 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
551 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
552 						 NUM_BANKS(ADDR_SURF_16_BANK) |
553 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
554 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
555 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
556 				break;
557 			case 15:
558 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
559 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
560 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
561 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
562 						 NUM_BANKS(ADDR_SURF_16_BANK) |
563 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
564 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
565 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
566 				break;
567 			case 16:
568 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
569 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
570 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
571 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
572 						 NUM_BANKS(ADDR_SURF_16_BANK) |
573 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
574 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
575 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
576 				break;
577 			case 17:
578 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
579 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
580 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
581 						 TILE_SPLIT(split_equal_to_row_size) |
582 						 NUM_BANKS(ADDR_SURF_16_BANK) |
583 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
584 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
585 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
586 				break;
587 			case 21:
588 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
589 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
590 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
591 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
592 						 NUM_BANKS(ADDR_SURF_16_BANK) |
593 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
594 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
595 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
596 				break;
597 			case 22:
598 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
599 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
600 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
601 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
602 						 NUM_BANKS(ADDR_SURF_16_BANK) |
603 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
604 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
605 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
606 				break;
607 			case 23:
608 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
609 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
610 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
611 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
612 						 NUM_BANKS(ADDR_SURF_16_BANK) |
613 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
614 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
615 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
616 				break;
617 			case 24:
618 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
619 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
620 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
621 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
622 						 NUM_BANKS(ADDR_SURF_16_BANK) |
623 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
624 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
625 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
626 				break;
627 			case 25:
628 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
629 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
630 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
631 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
632 						 NUM_BANKS(ADDR_SURF_8_BANK) |
633 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
634 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
635 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
636 				break;
637 			default:
638 				gb_tile_moden = 0;
639 				break;
640 			}
641 			adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden;
642 			WREG32(GB_TILE_MODE0 + reg_offset, gb_tile_moden);
643 		}
644 	} else if ((adev->asic_type == CHIP_TAHITI) || (adev->asic_type == CHIP_PITCAIRN)) {
645 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
646 			switch (reg_offset) {
647 			case 0:  /* non-AA compressed depth or any compressed stencil */
648 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
649 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
650 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
651 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
652 						 NUM_BANKS(ADDR_SURF_16_BANK) |
653 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
654 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
655 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
656 				break;
657 			case 1:  /* 2xAA/4xAA compressed depth only */
658 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
659 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
660 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
661 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
662 						 NUM_BANKS(ADDR_SURF_16_BANK) |
663 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
664 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
665 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
666 				break;
667 			case 2:  /* 8xAA compressed depth only */
668 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
669 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
670 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
671 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
672 						 NUM_BANKS(ADDR_SURF_16_BANK) |
673 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
674 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
675 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
676 				break;
677 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
678 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
679 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
680 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
681 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
682 						 NUM_BANKS(ADDR_SURF_16_BANK) |
683 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
684 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
685 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
686 				break;
687 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
688 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
689 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
690 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
691 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
692 						 NUM_BANKS(ADDR_SURF_16_BANK) |
693 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
694 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
695 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
696 				break;
697 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
698 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
699 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
700 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
701 						 TILE_SPLIT(split_equal_to_row_size) |
702 						 NUM_BANKS(ADDR_SURF_16_BANK) |
703 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
704 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
705 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
706 				break;
707 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
708 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
709 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
710 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
711 						 TILE_SPLIT(split_equal_to_row_size) |
712 						 NUM_BANKS(ADDR_SURF_16_BANK) |
713 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
714 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
715 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
716 				break;
717 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
718 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
719 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
720 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
721 						 TILE_SPLIT(split_equal_to_row_size) |
722 						 NUM_BANKS(ADDR_SURF_16_BANK) |
723 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
724 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
725 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
726 				break;
727 			case 8:  /* 1D and 1D Array Surfaces */
728 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
729 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
730 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
731 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
732 						 NUM_BANKS(ADDR_SURF_16_BANK) |
733 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
734 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
735 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
736 				break;
737 			case 9:  /* Displayable maps. */
738 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
739 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
740 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
741 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
742 						 NUM_BANKS(ADDR_SURF_16_BANK) |
743 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
744 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
745 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
746 				break;
747 			case 10:  /* Display 8bpp. */
748 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
749 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
750 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
751 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
752 						 NUM_BANKS(ADDR_SURF_16_BANK) |
753 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
754 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
755 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
756 				break;
757 			case 11:  /* Display 16bpp. */
758 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
759 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
760 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
761 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
762 						 NUM_BANKS(ADDR_SURF_16_BANK) |
763 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
764 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
765 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
766 				break;
767 			case 12:  /* Display 32bpp. */
768 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
769 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
770 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
771 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
772 						 NUM_BANKS(ADDR_SURF_16_BANK) |
773 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
774 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
775 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
776 				break;
777 			case 13:  /* Thin. */
778 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
779 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
780 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
781 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
782 						 NUM_BANKS(ADDR_SURF_16_BANK) |
783 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
784 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
785 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
786 				break;
787 			case 14:  /* Thin 8 bpp. */
788 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
789 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
790 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
791 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
792 						 NUM_BANKS(ADDR_SURF_16_BANK) |
793 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
794 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
795 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
796 				break;
797 			case 15:  /* Thin 16 bpp. */
798 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
799 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
800 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
801 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
802 						 NUM_BANKS(ADDR_SURF_16_BANK) |
803 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
804 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
805 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
806 				break;
807 			case 16:  /* Thin 32 bpp. */
808 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
809 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
810 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
811 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
812 						 NUM_BANKS(ADDR_SURF_16_BANK) |
813 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
814 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
815 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
816 				break;
817 			case 17:  /* Thin 64 bpp. */
818 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
819 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
820 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
821 						 TILE_SPLIT(split_equal_to_row_size) |
822 						 NUM_BANKS(ADDR_SURF_16_BANK) |
823 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
824 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
825 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
826 				break;
827 			case 21:  /* 8 bpp PRT. */
828 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
829 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
830 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
831 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
832 						 NUM_BANKS(ADDR_SURF_16_BANK) |
833 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
834 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
835 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
836 				break;
837 			case 22:  /* 16 bpp PRT */
838 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
839 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
840 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
841 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
842 						 NUM_BANKS(ADDR_SURF_16_BANK) |
843 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
844 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
845 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
846 				break;
847 			case 23:  /* 32 bpp PRT */
848 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
849 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
850 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
851 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
852 						 NUM_BANKS(ADDR_SURF_16_BANK) |
853 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
854 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
855 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
856 				break;
857 			case 24:  /* 64 bpp PRT */
858 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
859 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
860 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
861 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
862 						 NUM_BANKS(ADDR_SURF_16_BANK) |
863 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
864 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
865 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
866 				break;
867 			case 25:  /* 128 bpp PRT */
868 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
869 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
870 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
871 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
872 						 NUM_BANKS(ADDR_SURF_8_BANK) |
873 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
874 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
875 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
876 				break;
877 			default:
878 				gb_tile_moden = 0;
879 				break;
880 			}
881 			adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden;
882 			WREG32(GB_TILE_MODE0 + reg_offset, gb_tile_moden);
883 		}
884 	} else{
885 
886 		DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
887 	}
888 
889 }
890 
891 static void gfx_v6_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
892 				  u32 sh_num, u32 instance)
893 {
894 	u32 data;
895 
896 	if (instance == 0xffffffff)
897 		data = INSTANCE_BROADCAST_WRITES;
898 	else
899 		data = INSTANCE_INDEX(instance);
900 
901 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
902 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
903 	else if (se_num == 0xffffffff)
904 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
905 	else if (sh_num == 0xffffffff)
906 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
907 	else
908 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
909 	WREG32(GRBM_GFX_INDEX, data);
910 }
911 
912 static u32 gfx_v6_0_create_bitmask(u32 bit_width)
913 {
914 	return (u32)(((u64)1 << bit_width) - 1);
915 }
916 
917 static u32 gfx_v6_0_get_rb_disabled(struct amdgpu_device *adev,
918 				    u32 max_rb_num_per_se,
919 				    u32 sh_per_se)
920 {
921 	u32 data, mask;
922 
923 	data = RREG32(CC_RB_BACKEND_DISABLE);
924 	data &= BACKEND_DISABLE_MASK;
925 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
926 
927 	data >>= BACKEND_DISABLE_SHIFT;
928 
929 	mask = gfx_v6_0_create_bitmask(max_rb_num_per_se / sh_per_se);
930 
931 	return data & mask;
932 }
933 
934 static void gfx_v6_0_raster_config(struct amdgpu_device *adev, u32 *rconf)
935 {
936 	switch (adev->asic_type) {
937 	case CHIP_TAHITI:
938 	case CHIP_PITCAIRN:
939 		*rconf |= RB_XSEL2(2) | RB_XSEL | PKR_MAP(2) | PKR_YSEL(1) |
940 			  SE_MAP(2) | SE_XSEL(2) | SE_YSEL(2);
941 		break;
942 	case CHIP_VERDE:
943 		*rconf |= RB_XSEL | PKR_MAP(2) | PKR_YSEL(1);
944 		break;
945 	case CHIP_OLAND:
946 		*rconf |= RB_YSEL;
947 		break;
948 	case CHIP_HAINAN:
949 		*rconf |= 0x0;
950 		break;
951 	default:
952 		DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
953 		break;
954 	}
955 }
956 
957 static void gfx_v6_0_write_harvested_raster_configs(struct amdgpu_device *adev,
958 						    u32 raster_config, unsigned rb_mask,
959 						    unsigned num_rb)
960 {
961 	unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
962 	unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
963 	unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
964 	unsigned rb_per_se = num_rb / num_se;
965 	unsigned se_mask[4];
966 	unsigned se;
967 
968 	se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
969 	se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
970 	se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
971 	se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
972 
973 	WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
974 	WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
975 	WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
976 
977 	for (se = 0; se < num_se; se++) {
978 		unsigned raster_config_se = raster_config;
979 		unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
980 		unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
981 		int idx = (se / 2) * 2;
982 
983 		if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
984 			raster_config_se &= ~SE_MAP_MASK;
985 
986 			if (!se_mask[idx]) {
987 				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
988 			} else {
989 				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
990 			}
991 		}
992 
993 		pkr0_mask &= rb_mask;
994 		pkr1_mask &= rb_mask;
995 		if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
996 			raster_config_se &= ~PKR_MAP_MASK;
997 
998 			if (!pkr0_mask) {
999 				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
1000 			} else {
1001 				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
1002 			}
1003 		}
1004 
1005 		if (rb_per_se >= 2) {
1006 			unsigned rb0_mask = 1 << (se * rb_per_se);
1007 			unsigned rb1_mask = rb0_mask << 1;
1008 
1009 			rb0_mask &= rb_mask;
1010 			rb1_mask &= rb_mask;
1011 			if (!rb0_mask || !rb1_mask) {
1012 				raster_config_se &= ~RB_MAP_PKR0_MASK;
1013 
1014 				if (!rb0_mask) {
1015 					raster_config_se |=
1016 						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
1017 				} else {
1018 					raster_config_se |=
1019 						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
1020 				}
1021 			}
1022 
1023 			if (rb_per_se > 2) {
1024 				rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
1025 				rb1_mask = rb0_mask << 1;
1026 				rb0_mask &= rb_mask;
1027 				rb1_mask &= rb_mask;
1028 				if (!rb0_mask || !rb1_mask) {
1029 					raster_config_se &= ~RB_MAP_PKR1_MASK;
1030 
1031 					if (!rb0_mask) {
1032 						raster_config_se |=
1033 							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
1034 					} else {
1035 						raster_config_se |=
1036 							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
1037 					}
1038 				}
1039 			}
1040 		}
1041 
1042 		/* GRBM_GFX_INDEX has a different offset on SI */
1043 		gfx_v6_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
1044 		WREG32(PA_SC_RASTER_CONFIG, raster_config_se);
1045 	}
1046 
1047 	/* GRBM_GFX_INDEX has a different offset on SI */
1048 	gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1049 }
1050 
1051 static void gfx_v6_0_setup_rb(struct amdgpu_device *adev,
1052 			      u32 se_num, u32 sh_per_se,
1053 			      u32 max_rb_num_per_se)
1054 {
1055 	int i, j;
1056 	u32 data, mask;
1057 	u32 disabled_rbs = 0;
1058 	u32 enabled_rbs = 0;
1059 	unsigned num_rb_pipes;
1060 
1061 	mutex_lock(&adev->grbm_idx_mutex);
1062 	for (i = 0; i < se_num; i++) {
1063 		for (j = 0; j < sh_per_se; j++) {
1064 			gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff);
1065 			data = gfx_v6_0_get_rb_disabled(adev, max_rb_num_per_se, sh_per_se);
1066 			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
1067 		}
1068 	}
1069 	gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1070 	mutex_unlock(&adev->grbm_idx_mutex);
1071 
1072 	mask = 1;
1073 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
1074 		if (!(disabled_rbs & mask))
1075 			enabled_rbs |= mask;
1076 		mask <<= 1;
1077 	}
1078 
1079 	adev->gfx.config.backend_enable_mask = enabled_rbs;
1080 	adev->gfx.config.num_rbs = hweight32(enabled_rbs);
1081 
1082 	num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
1083 			     adev->gfx.config.max_shader_engines, 16);
1084 
1085 	mutex_lock(&adev->grbm_idx_mutex);
1086 	for (i = 0; i < se_num; i++) {
1087 		gfx_v6_0_select_se_sh(adev, i, 0xffffffff, 0xffffffff);
1088 		data = 0;
1089 		for (j = 0; j < sh_per_se; j++) {
1090 			switch (enabled_rbs & 3) {
1091 			case 1:
1092 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1093 				break;
1094 			case 2:
1095 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1096 				break;
1097 			case 3:
1098 			default:
1099 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1100 				break;
1101 			}
1102 			enabled_rbs >>= 2;
1103 		}
1104 		gfx_v6_0_raster_config(adev, &data);
1105 
1106 		if (!adev->gfx.config.backend_enable_mask ||
1107 				adev->gfx.config.num_rbs >= num_rb_pipes)
1108 			WREG32(PA_SC_RASTER_CONFIG, data);
1109 		else
1110 			gfx_v6_0_write_harvested_raster_configs(adev, data,
1111 								adev->gfx.config.backend_enable_mask,
1112 								num_rb_pipes);
1113 	}
1114 	gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1115 	mutex_unlock(&adev->grbm_idx_mutex);
1116 }
1117 /*
1118 static void gmc_v6_0_init_compute_vmid(struct amdgpu_device *adev)
1119 {
1120 }
1121 */
1122 
1123 static u32 gfx_v6_0_get_cu_enabled(struct amdgpu_device *adev, u32 cu_per_sh)
1124 {
1125 	u32 data, mask;
1126 
1127 	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
1128 	data &= INACTIVE_CUS_MASK;
1129 	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
1130 
1131 	data >>= INACTIVE_CUS_SHIFT;
1132 
1133 	mask = gfx_v6_0_create_bitmask(cu_per_sh);
1134 
1135 	return ~data & mask;
1136 }
1137 
1138 
1139 static void gfx_v6_0_setup_spi(struct amdgpu_device *adev,
1140 			 u32 se_num, u32 sh_per_se,
1141 			 u32 cu_per_sh)
1142 {
1143 	int i, j, k;
1144 	u32 data, mask;
1145 	u32 active_cu = 0;
1146 
1147 	mutex_lock(&adev->grbm_idx_mutex);
1148 	for (i = 0; i < se_num; i++) {
1149 		for (j = 0; j < sh_per_se; j++) {
1150 			gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff);
1151 			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
1152 			active_cu = gfx_v6_0_get_cu_enabled(adev, cu_per_sh);
1153 
1154 			mask = 1;
1155 			for (k = 0; k < 16; k++) {
1156 				mask <<= k;
1157 				if (active_cu & mask) {
1158 					data &= ~mask;
1159 					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
1160 					break;
1161 				}
1162 			}
1163 		}
1164 	}
1165 	gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1166 	mutex_unlock(&adev->grbm_idx_mutex);
1167 }
1168 
1169 static void gfx_v6_0_gpu_init(struct amdgpu_device *adev)
1170 {
1171 	u32 gb_addr_config = 0;
1172 	u32 mc_shared_chmap, mc_arb_ramcfg;
1173 	u32 sx_debug_1;
1174 	u32 hdp_host_path_cntl;
1175 	u32 tmp;
1176 
1177 	switch (adev->asic_type) {
1178 	case CHIP_TAHITI:
1179 		adev->gfx.config.max_shader_engines = 2;
1180 		adev->gfx.config.max_tile_pipes = 12;
1181 		adev->gfx.config.max_cu_per_sh = 8;
1182 		adev->gfx.config.max_sh_per_se = 2;
1183 		adev->gfx.config.max_backends_per_se = 4;
1184 		adev->gfx.config.max_texture_channel_caches = 12;
1185 		adev->gfx.config.max_gprs = 256;
1186 		adev->gfx.config.max_gs_threads = 32;
1187 		adev->gfx.config.max_hw_contexts = 8;
1188 
1189 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1190 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1191 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1192 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1193 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
1194 		break;
1195 	case CHIP_PITCAIRN:
1196 		adev->gfx.config.max_shader_engines = 2;
1197 		adev->gfx.config.max_tile_pipes = 8;
1198 		adev->gfx.config.max_cu_per_sh = 5;
1199 		adev->gfx.config.max_sh_per_se = 2;
1200 		adev->gfx.config.max_backends_per_se = 4;
1201 		adev->gfx.config.max_texture_channel_caches = 8;
1202 		adev->gfx.config.max_gprs = 256;
1203 		adev->gfx.config.max_gs_threads = 32;
1204 		adev->gfx.config.max_hw_contexts = 8;
1205 
1206 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1207 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1208 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1209 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1210 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
1211 		break;
1212 
1213 	case CHIP_VERDE:
1214 		adev->gfx.config.max_shader_engines = 1;
1215 		adev->gfx.config.max_tile_pipes = 4;
1216 		adev->gfx.config.max_cu_per_sh = 5;
1217 		adev->gfx.config.max_sh_per_se = 2;
1218 		adev->gfx.config.max_backends_per_se = 4;
1219 		adev->gfx.config.max_texture_channel_caches = 4;
1220 		adev->gfx.config.max_gprs = 256;
1221 		adev->gfx.config.max_gs_threads = 32;
1222 		adev->gfx.config.max_hw_contexts = 8;
1223 
1224 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1225 		adev->gfx.config.sc_prim_fifo_size_backend = 0x40;
1226 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1227 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1228 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
1229 		break;
1230 	case CHIP_OLAND:
1231 		adev->gfx.config.max_shader_engines = 1;
1232 		adev->gfx.config.max_tile_pipes = 4;
1233 		adev->gfx.config.max_cu_per_sh = 6;
1234 		adev->gfx.config.max_sh_per_se = 1;
1235 		adev->gfx.config.max_backends_per_se = 2;
1236 		adev->gfx.config.max_texture_channel_caches = 4;
1237 		adev->gfx.config.max_gprs = 256;
1238 		adev->gfx.config.max_gs_threads = 16;
1239 		adev->gfx.config.max_hw_contexts = 8;
1240 
1241 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1242 		adev->gfx.config.sc_prim_fifo_size_backend = 0x40;
1243 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1244 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1245 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
1246 		break;
1247 	case CHIP_HAINAN:
1248 		adev->gfx.config.max_shader_engines = 1;
1249 		adev->gfx.config.max_tile_pipes = 4;
1250 		adev->gfx.config.max_cu_per_sh = 5;
1251 		adev->gfx.config.max_sh_per_se = 1;
1252 		adev->gfx.config.max_backends_per_se = 1;
1253 		adev->gfx.config.max_texture_channel_caches = 2;
1254 		adev->gfx.config.max_gprs = 256;
1255 		adev->gfx.config.max_gs_threads = 16;
1256 		adev->gfx.config.max_hw_contexts = 8;
1257 
1258 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1259 		adev->gfx.config.sc_prim_fifo_size_backend = 0x40;
1260 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1261 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1262 		gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
1263 		break;
1264 	default:
1265 		BUG();
1266 		break;
1267 	}
1268 
1269 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1270 	WREG32(SRBM_INT_CNTL, 1);
1271 	WREG32(SRBM_INT_ACK, 1);
1272 
1273 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1274 
1275 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1276 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1277 
1278 	adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1279 	adev->gfx.config.mem_max_burst_length_bytes = 256;
1280 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1281 	adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1282 	if (adev->gfx.config.mem_row_size_in_kb > 4)
1283 		adev->gfx.config.mem_row_size_in_kb = 4;
1284 	adev->gfx.config.shader_engine_tile_size = 32;
1285 	adev->gfx.config.num_gpus = 1;
1286 	adev->gfx.config.multi_gpu_tile_size = 64;
1287 
1288 	gb_addr_config &= ~ROW_SIZE_MASK;
1289 	switch (adev->gfx.config.mem_row_size_in_kb) {
1290 	case 1:
1291 	default:
1292 		gb_addr_config |= ROW_SIZE(0);
1293 		break;
1294 	case 2:
1295 		gb_addr_config |= ROW_SIZE(1);
1296 		break;
1297 	case 4:
1298 		gb_addr_config |= ROW_SIZE(2);
1299 		break;
1300 	}
1301 	adev->gfx.config.gb_addr_config = gb_addr_config;
1302 
1303 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
1304 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
1305 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
1306 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1307 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
1308 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
1309 #if 0
1310 	if (adev->has_uvd) {
1311 		WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
1312 		WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
1313 		WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
1314 	}
1315 #endif
1316 	gfx_v6_0_tiling_mode_table_init(adev);
1317 
1318 	gfx_v6_0_setup_rb(adev, adev->gfx.config.max_shader_engines,
1319 		    adev->gfx.config.max_sh_per_se,
1320 		    adev->gfx.config.max_backends_per_se);
1321 
1322 	gfx_v6_0_setup_spi(adev, adev->gfx.config.max_shader_engines,
1323 		     adev->gfx.config.max_sh_per_se,
1324 		     adev->gfx.config.max_cu_per_sh);
1325 
1326 	gfx_v6_0_get_cu_info(adev);
1327 
1328 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
1329 				     ROQ_IB2_START(0x2b)));
1330 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1331 
1332 	sx_debug_1 = RREG32(SX_DEBUG_1);
1333 	WREG32(SX_DEBUG_1, sx_debug_1);
1334 
1335 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
1336 
1337 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(adev->gfx.config.sc_prim_fifo_size_frontend) |
1338 				 SC_BACKEND_PRIM_FIFO_SIZE(adev->gfx.config.sc_prim_fifo_size_backend) |
1339 				 SC_HIZ_TILE_FIFO_SIZE(adev->gfx.config.sc_hiz_tile_fifo_size) |
1340 				 SC_EARLYZ_TILE_FIFO_SIZE(adev->gfx.config.sc_earlyz_tile_fifo_size)));
1341 
1342 	WREG32(VGT_NUM_INSTANCES, 1);
1343 	WREG32(CP_PERFMON_CNTL, 0);
1344 	WREG32(SQ_CONFIG, 0);
1345 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
1346 					  FORCE_EOV_MAX_REZ_CNT(255)));
1347 
1348 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
1349 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
1350 
1351 	WREG32(VGT_GS_VERTEX_REUSE, 16);
1352 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
1353 
1354 	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
1355 	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
1356 	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
1357 	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
1358 	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
1359 	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
1360 	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
1361 	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
1362 
1363 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
1364 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1365 
1366 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
1367 
1368 	udelay(50);
1369 }
1370 
1371 
1372 static void gfx_v6_0_scratch_init(struct amdgpu_device *adev)
1373 {
1374 	int i;
1375 
1376 	adev->gfx.scratch.num_reg = 7;
1377 	adev->gfx.scratch.reg_base = SCRATCH_REG0;
1378 	for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
1379 		adev->gfx.scratch.free[i] = true;
1380 		adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
1381 	}
1382 }
1383 
1384 static int gfx_v6_0_ring_test_ring(struct amdgpu_ring *ring)
1385 {
1386 	struct amdgpu_device *adev = ring->adev;
1387 	uint32_t scratch;
1388 	uint32_t tmp = 0;
1389 	unsigned i;
1390 	int r;
1391 
1392 	r = amdgpu_gfx_scratch_get(adev, &scratch);
1393 	if (r) {
1394 		DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
1395 		return r;
1396 	}
1397 	WREG32(scratch, 0xCAFEDEAD);
1398 
1399 	r = amdgpu_ring_alloc(ring, 3);
1400 	if (r) {
1401 		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", ring->idx, r);
1402 		amdgpu_gfx_scratch_free(adev, scratch);
1403 		return r;
1404 	}
1405 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1406 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_CONFIG_REG_START));
1407 	amdgpu_ring_write(ring, 0xDEADBEEF);
1408 	amdgpu_ring_commit(ring);
1409 
1410 	for (i = 0; i < adev->usec_timeout; i++) {
1411 		tmp = RREG32(scratch);
1412 		if (tmp == 0xDEADBEEF)
1413 			break;
1414 		DRM_UDELAY(1);
1415 	}
1416 	if (i < adev->usec_timeout) {
1417 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
1418 	} else {
1419 		DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
1420 			  ring->idx, scratch, tmp);
1421 		r = -EINVAL;
1422 	}
1423 	amdgpu_gfx_scratch_free(adev, scratch);
1424 	return r;
1425 }
1426 
1427 static void gfx_v6_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
1428 {
1429 	/* flush hdp cache */
1430 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1431 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
1432 				 WRITE_DATA_DST_SEL(0)));
1433 	amdgpu_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL);
1434 	amdgpu_ring_write(ring, 0);
1435 	amdgpu_ring_write(ring, 0x1);
1436 }
1437 
1438 /**
1439  * gfx_v6_0_ring_emit_hdp_invalidate - emit an hdp invalidate on the cp
1440  *
1441  * @adev: amdgpu_device pointer
1442  * @ridx: amdgpu ring index
1443  *
1444  * Emits an hdp invalidate on the cp.
1445  */
1446 static void gfx_v6_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
1447 {
1448 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1449 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
1450 				 WRITE_DATA_DST_SEL(0)));
1451 	amdgpu_ring_write(ring, HDP_DEBUG0);
1452 	amdgpu_ring_write(ring, 0);
1453 	amdgpu_ring_write(ring, 0x1);
1454 }
1455 
1456 static void gfx_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
1457 				     u64 seq, unsigned flags)
1458 {
1459 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
1460 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
1461 	/* flush read cache over gart */
1462 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1463 	amdgpu_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START));
1464 	amdgpu_ring_write(ring, 0);
1465 	amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
1466 	amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
1467 			  PACKET3_TC_ACTION_ENA |
1468 			  PACKET3_SH_KCACHE_ACTION_ENA |
1469 			  PACKET3_SH_ICACHE_ACTION_ENA);
1470 	amdgpu_ring_write(ring, 0xFFFFFFFF);
1471 	amdgpu_ring_write(ring, 0);
1472 	amdgpu_ring_write(ring, 10); /* poll interval */
1473 	/* EVENT_WRITE_EOP - flush caches, send int */
1474 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
1475 	amdgpu_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
1476 	amdgpu_ring_write(ring, addr & 0xfffffffc);
1477 	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
1478 				DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
1479 	amdgpu_ring_write(ring, lower_32_bits(seq));
1480 	amdgpu_ring_write(ring, upper_32_bits(seq));
1481 }
1482 
1483 static void gfx_v6_0_ring_emit_ib(struct amdgpu_ring *ring,
1484 				  struct amdgpu_ib *ib,
1485 				  unsigned vm_id, bool ctx_switch)
1486 {
1487 	u32 header, control = 0;
1488 
1489 	/* insert SWITCH_BUFFER packet before first IB in the ring frame */
1490 	if (ctx_switch) {
1491 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
1492 		amdgpu_ring_write(ring, 0);
1493 	}
1494 
1495 	if (ib->flags & AMDGPU_IB_FLAG_CE)
1496 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
1497 	else
1498 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
1499 
1500 	control |= ib->length_dw | (vm_id << 24);
1501 
1502 	amdgpu_ring_write(ring, header);
1503 	amdgpu_ring_write(ring,
1504 #ifdef __BIG_ENDIAN
1505 			  (2 << 0) |
1506 #endif
1507 			  (ib->gpu_addr & 0xFFFFFFFC));
1508 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
1509 	amdgpu_ring_write(ring, control);
1510 }
1511 
1512 /**
1513  * gfx_v6_0_ring_test_ib - basic ring IB test
1514  *
1515  * @ring: amdgpu_ring structure holding ring information
1516  *
1517  * Allocate an IB and execute it on the gfx ring (SI).
1518  * Provides a basic gfx ring test to verify that IBs are working.
1519  * Returns 0 on success, error on failure.
1520  */
1521 static int gfx_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1522 {
1523 	struct amdgpu_device *adev = ring->adev;
1524 	struct amdgpu_ib ib;
1525 	struct dma_fence *f = NULL;
1526 	uint32_t scratch;
1527 	uint32_t tmp = 0;
1528 	long r;
1529 
1530 	r = amdgpu_gfx_scratch_get(adev, &scratch);
1531 	if (r) {
1532 		DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
1533 		return r;
1534 	}
1535 	WREG32(scratch, 0xCAFEDEAD);
1536 	memset(&ib, 0, sizeof(ib));
1537 	r = amdgpu_ib_get(adev, NULL, 256, &ib);
1538 	if (r) {
1539 		DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
1540 		goto err1;
1541 	}
1542 	ib.ptr[0] = PACKET3(PACKET3_SET_CONFIG_REG, 1);
1543 	ib.ptr[1] = ((scratch - PACKET3_SET_CONFIG_REG_START));
1544 	ib.ptr[2] = 0xDEADBEEF;
1545 	ib.length_dw = 3;
1546 
1547 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
1548 	if (r)
1549 		goto err2;
1550 
1551 	r = dma_fence_wait_timeout(f, false, timeout);
1552 	if (r == 0) {
1553 		DRM_ERROR("amdgpu: IB test timed out\n");
1554 		r = -ETIMEDOUT;
1555 		goto err2;
1556 	} else if (r < 0) {
1557 		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
1558 		goto err2;
1559 	}
1560 	tmp = RREG32(scratch);
1561 	if (tmp == 0xDEADBEEF) {
1562 		DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
1563 		r = 0;
1564 	} else {
1565 		DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
1566 			  scratch, tmp);
1567 		r = -EINVAL;
1568 	}
1569 
1570 err2:
1571 	amdgpu_ib_free(adev, &ib, NULL);
1572 	dma_fence_put(f);
1573 err1:
1574 	amdgpu_gfx_scratch_free(adev, scratch);
1575 	return r;
1576 }
1577 
1578 static void gfx_v6_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
1579 {
1580 	int i;
1581 	if (enable)
1582 		WREG32(CP_ME_CNTL, 0);
1583 	else {
1584 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
1585 		WREG32(SCRATCH_UMSK, 0);
1586 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1587 			adev->gfx.gfx_ring[i].ready = false;
1588 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
1589 			adev->gfx.compute_ring[i].ready = false;
1590 	}
1591 	udelay(50);
1592 }
1593 
1594 static int gfx_v6_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
1595 {
1596 	unsigned i;
1597 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
1598 	const struct gfx_firmware_header_v1_0 *ce_hdr;
1599 	const struct gfx_firmware_header_v1_0 *me_hdr;
1600 	const __le32 *fw_data;
1601 	u32 fw_size;
1602 
1603 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
1604 		return -EINVAL;
1605 
1606 	gfx_v6_0_cp_gfx_enable(adev, false);
1607 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1608 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1609 	me_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1610 
1611 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
1612 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
1613 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
1614 
1615 	/* PFP */
1616 	fw_data = (const __le32 *)
1617 		(adev->gfx.pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
1618 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
1619 	WREG32(CP_PFP_UCODE_ADDR, 0);
1620 	for (i = 0; i < fw_size; i++)
1621 		WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
1622 	WREG32(CP_PFP_UCODE_ADDR, 0);
1623 
1624 	/* CE */
1625 	fw_data = (const __le32 *)
1626 		(adev->gfx.ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
1627 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
1628 	WREG32(CP_CE_UCODE_ADDR, 0);
1629 	for (i = 0; i < fw_size; i++)
1630 		WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
1631 	WREG32(CP_CE_UCODE_ADDR, 0);
1632 
1633 	/* ME */
1634 	fw_data = (const __be32 *)
1635 		(adev->gfx.me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
1636 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
1637 	WREG32(CP_ME_RAM_WADDR, 0);
1638 	for (i = 0; i < fw_size; i++)
1639 		WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
1640 	WREG32(CP_ME_RAM_WADDR, 0);
1641 
1642 
1643 	WREG32(CP_PFP_UCODE_ADDR, 0);
1644 	WREG32(CP_CE_UCODE_ADDR, 0);
1645 	WREG32(CP_ME_RAM_WADDR, 0);
1646 	WREG32(CP_ME_RAM_RADDR, 0);
1647 	return 0;
1648 }
1649 
1650 static int gfx_v6_0_cp_gfx_start(struct amdgpu_device *adev)
1651 {
1652 	const struct cs_section_def *sect = NULL;
1653 	const struct cs_extent_def *ext = NULL;
1654 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
1655 	int r, i;
1656 
1657 	r = amdgpu_ring_alloc(ring, 7 + 4);
1658 	if (r) {
1659 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
1660 		return r;
1661 	}
1662 	amdgpu_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
1663 	amdgpu_ring_write(ring, 0x1);
1664 	amdgpu_ring_write(ring, 0x0);
1665 	amdgpu_ring_write(ring, adev->gfx.config.max_hw_contexts - 1);
1666 	amdgpu_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
1667 	amdgpu_ring_write(ring, 0);
1668 	amdgpu_ring_write(ring, 0);
1669 
1670 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
1671 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
1672 	amdgpu_ring_write(ring, 0xc000);
1673 	amdgpu_ring_write(ring, 0xe000);
1674 	amdgpu_ring_commit(ring);
1675 
1676 	gfx_v6_0_cp_gfx_enable(adev, true);
1677 
1678 	r = amdgpu_ring_alloc(ring, gfx_v6_0_get_csb_size(adev) + 10);
1679 	if (r) {
1680 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
1681 		return r;
1682 	}
1683 
1684 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1685 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1686 
1687 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1688 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1689 			if (sect->id == SECT_CONTEXT) {
1690 				amdgpu_ring_write(ring,
1691 						  PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1692 				amdgpu_ring_write(ring, ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
1693 				for (i = 0; i < ext->reg_count; i++)
1694 					amdgpu_ring_write(ring, ext->extent[i]);
1695 			}
1696 		}
1697 	}
1698 
1699 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1700 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
1701 
1702 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
1703 	amdgpu_ring_write(ring, 0);
1704 
1705 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1706 	amdgpu_ring_write(ring, 0x00000316);
1707 	amdgpu_ring_write(ring, 0x0000000e);
1708 	amdgpu_ring_write(ring, 0x00000010);
1709 
1710 	amdgpu_ring_commit(ring);
1711 
1712 	return 0;
1713 }
1714 
1715 static int gfx_v6_0_cp_gfx_resume(struct amdgpu_device *adev)
1716 {
1717 	struct amdgpu_ring *ring;
1718 	u32 tmp;
1719 	u32 rb_bufsz;
1720 	int r;
1721 	u64 rptr_addr;
1722 
1723 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
1724 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
1725 
1726 	/* Set the write pointer delay */
1727 	WREG32(CP_RB_WPTR_DELAY, 0);
1728 
1729 	WREG32(CP_DEBUG, 0);
1730 	WREG32(SCRATCH_ADDR, 0);
1731 
1732 	/* ring 0 - compute and gfx */
1733 	/* Set ring buffer size */
1734 	ring = &adev->gfx.gfx_ring[0];
1735 	rb_bufsz = order_base_2(ring->ring_size / 8);
1736 	tmp = (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
1737 
1738 #ifdef __BIG_ENDIAN
1739 	tmp |= BUF_SWAP_32BIT;
1740 #endif
1741 	WREG32(CP_RB0_CNTL, tmp);
1742 
1743 	/* Initialize the ring buffer's read and write pointers */
1744 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
1745 	ring->wptr = 0;
1746 	WREG32(CP_RB0_WPTR, ring->wptr);
1747 
1748 	/* set the wb address whether it's enabled or not */
1749 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
1750 	WREG32(CP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
1751 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
1752 
1753 	WREG32(SCRATCH_UMSK, 0);
1754 
1755 	mdelay(1);
1756 	WREG32(CP_RB0_CNTL, tmp);
1757 
1758 	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
1759 
1760 	/* start the rings */
1761 	gfx_v6_0_cp_gfx_start(adev);
1762 	ring->ready = true;
1763 	r = amdgpu_ring_test_ring(ring);
1764 	if (r) {
1765 		ring->ready = false;
1766 		return r;
1767 	}
1768 
1769 	return 0;
1770 }
1771 
1772 static u32 gfx_v6_0_ring_get_rptr(struct amdgpu_ring *ring)
1773 {
1774 	return ring->adev->wb.wb[ring->rptr_offs];
1775 }
1776 
1777 static u32 gfx_v6_0_ring_get_wptr(struct amdgpu_ring *ring)
1778 {
1779 	struct amdgpu_device *adev = ring->adev;
1780 
1781 	if (ring == &adev->gfx.gfx_ring[0])
1782 		return RREG32(CP_RB0_WPTR);
1783 	else if (ring == &adev->gfx.compute_ring[0])
1784 		return RREG32(CP_RB1_WPTR);
1785 	else if (ring == &adev->gfx.compute_ring[1])
1786 		return RREG32(CP_RB2_WPTR);
1787 	else
1788 		BUG();
1789 }
1790 
1791 static void gfx_v6_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
1792 {
1793 	struct amdgpu_device *adev = ring->adev;
1794 
1795 	WREG32(CP_RB0_WPTR, ring->wptr);
1796 	(void)RREG32(CP_RB0_WPTR);
1797 }
1798 
1799 static void gfx_v6_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
1800 {
1801 	struct amdgpu_device *adev = ring->adev;
1802 
1803 	if (ring == &adev->gfx.compute_ring[0]) {
1804 		WREG32(CP_RB1_WPTR, ring->wptr);
1805 		(void)RREG32(CP_RB1_WPTR);
1806 	} else if (ring == &adev->gfx.compute_ring[1]) {
1807 		WREG32(CP_RB2_WPTR, ring->wptr);
1808 		(void)RREG32(CP_RB2_WPTR);
1809 	} else {
1810 		BUG();
1811 	}
1812 
1813 }
1814 
1815 static int gfx_v6_0_cp_compute_resume(struct amdgpu_device *adev)
1816 {
1817 	struct amdgpu_ring *ring;
1818 	u32 tmp;
1819 	u32 rb_bufsz;
1820 	int r;
1821 	u64 rptr_addr;
1822 
1823 	/* ring1  - compute only */
1824 	/* Set ring buffer size */
1825 
1826 	ring = &adev->gfx.compute_ring[0];
1827 	rb_bufsz = order_base_2(ring->ring_size / 8);
1828 	tmp = (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
1829 #ifdef __BIG_ENDIAN
1830 	tmp |= BUF_SWAP_32BIT;
1831 #endif
1832 	WREG32(CP_RB1_CNTL, tmp);
1833 
1834 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
1835 	ring->wptr = 0;
1836 	WREG32(CP_RB1_WPTR, ring->wptr);
1837 
1838 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
1839 	WREG32(CP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
1840 	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
1841 
1842 	mdelay(1);
1843 	WREG32(CP_RB1_CNTL, tmp);
1844 	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
1845 
1846 	ring = &adev->gfx.compute_ring[1];
1847 	rb_bufsz = order_base_2(ring->ring_size / 8);
1848 	tmp = (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
1849 #ifdef __BIG_ENDIAN
1850 	tmp |= BUF_SWAP_32BIT;
1851 #endif
1852 	WREG32(CP_RB2_CNTL, tmp);
1853 
1854 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
1855 	ring->wptr = 0;
1856 	WREG32(CP_RB2_WPTR, ring->wptr);
1857 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
1858 	WREG32(CP_RB2_RPTR_ADDR, lower_32_bits(rptr_addr));
1859 	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
1860 
1861 	mdelay(1);
1862 	WREG32(CP_RB2_CNTL, tmp);
1863 	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
1864 
1865 	adev->gfx.compute_ring[0].ready = true;
1866 	adev->gfx.compute_ring[1].ready = true;
1867 
1868 	r = amdgpu_ring_test_ring(&adev->gfx.compute_ring[0]);
1869 	if (r) {
1870 		adev->gfx.compute_ring[0].ready = false;
1871 		return r;
1872 	}
1873 
1874 	r = amdgpu_ring_test_ring(&adev->gfx.compute_ring[1]);
1875 	if (r) {
1876 		adev->gfx.compute_ring[1].ready = false;
1877 		return r;
1878 	}
1879 
1880 	return 0;
1881 }
1882 
1883 static void gfx_v6_0_cp_enable(struct amdgpu_device *adev, bool enable)
1884 {
1885 	gfx_v6_0_cp_gfx_enable(adev, enable);
1886 }
1887 
1888 static int gfx_v6_0_cp_load_microcode(struct amdgpu_device *adev)
1889 {
1890 	return gfx_v6_0_cp_gfx_load_microcode(adev);
1891 }
1892 
1893 static void gfx_v6_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
1894 					       bool enable)
1895 {
1896 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
1897 	u32 mask;
1898 	int i;
1899 
1900 	if (enable)
1901 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
1902 	else
1903 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
1904 	WREG32(CP_INT_CNTL_RING0, tmp);
1905 
1906 	if (!enable) {
1907 		/* read a gfx register */
1908 		tmp = RREG32(DB_DEPTH_INFO);
1909 
1910 		mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
1911 		for (i = 0; i < adev->usec_timeout; i++) {
1912 			if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
1913 				break;
1914 			udelay(1);
1915 		}
1916 	}
1917 }
1918 
1919 static int gfx_v6_0_cp_resume(struct amdgpu_device *adev)
1920 {
1921 	int r;
1922 
1923 	gfx_v6_0_enable_gui_idle_interrupt(adev, false);
1924 
1925 	r = gfx_v6_0_cp_load_microcode(adev);
1926 	if (r)
1927 		return r;
1928 
1929 	r = gfx_v6_0_cp_gfx_resume(adev);
1930 	if (r)
1931 		return r;
1932 	r = gfx_v6_0_cp_compute_resume(adev);
1933 	if (r)
1934 		return r;
1935 
1936 	gfx_v6_0_enable_gui_idle_interrupt(adev, true);
1937 
1938 	return 0;
1939 }
1940 
1941 static void gfx_v6_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
1942 {
1943 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
1944 	uint32_t seq = ring->fence_drv.sync_seq;
1945 	uint64_t addr = ring->fence_drv.gpu_addr;
1946 
1947 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
1948 	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
1949 				 WAIT_REG_MEM_FUNCTION(3) | /* equal */
1950 				 WAIT_REG_MEM_ENGINE(usepfp)));   /* pfp or me */
1951 	amdgpu_ring_write(ring, addr & 0xfffffffc);
1952 	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
1953 	amdgpu_ring_write(ring, seq);
1954 	amdgpu_ring_write(ring, 0xffffffff);
1955 	amdgpu_ring_write(ring, 4); /* poll interval */
1956 
1957 	if (usepfp) {
1958 		/* synce CE with ME to prevent CE fetch CEIB before context switch done */
1959 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
1960 		amdgpu_ring_write(ring, 0);
1961 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
1962 		amdgpu_ring_write(ring, 0);
1963 	}
1964 }
1965 
1966 static void gfx_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
1967 					unsigned vm_id, uint64_t pd_addr)
1968 {
1969 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
1970 
1971 	/* write new base address */
1972 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1973 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
1974 				 WRITE_DATA_DST_SEL(0)));
1975 	if (vm_id < 8) {
1976 		amdgpu_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id ));
1977 	} else {
1978 		amdgpu_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + (vm_id - 8)));
1979 	}
1980 	amdgpu_ring_write(ring, 0);
1981 	amdgpu_ring_write(ring, pd_addr >> 12);
1982 
1983 	/* bits 0-15 are the VM contexts0-15 */
1984 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1985 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
1986 				 WRITE_DATA_DST_SEL(0)));
1987 	amdgpu_ring_write(ring, VM_INVALIDATE_REQUEST);
1988 	amdgpu_ring_write(ring, 0);
1989 	amdgpu_ring_write(ring, 1 << vm_id);
1990 
1991 	/* wait for the invalidate to complete */
1992 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
1993 	amdgpu_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) |  /* always */
1994 				 WAIT_REG_MEM_ENGINE(0))); /* me */
1995 	amdgpu_ring_write(ring, VM_INVALIDATE_REQUEST);
1996 	amdgpu_ring_write(ring, 0);
1997 	amdgpu_ring_write(ring, 0); /* ref */
1998 	amdgpu_ring_write(ring, 0); /* mask */
1999 	amdgpu_ring_write(ring, 0x20); /* poll interval */
2000 
2001 	if (usepfp) {
2002 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
2003 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
2004 		amdgpu_ring_write(ring, 0x0);
2005 
2006 		/* synce CE with ME to prevent CE fetch CEIB before context switch done */
2007 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
2008 		amdgpu_ring_write(ring, 0);
2009 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
2010 		amdgpu_ring_write(ring, 0);
2011 	}
2012 }
2013 
2014 
2015 static void gfx_v6_0_rlc_fini(struct amdgpu_device *adev)
2016 {
2017 	int r;
2018 
2019 	if (adev->gfx.rlc.save_restore_obj) {
2020 		r = amdgpu_bo_reserve(adev->gfx.rlc.save_restore_obj, false);
2021 		if (unlikely(r != 0))
2022 			dev_warn(adev->dev, "(%d) reserve RLC sr bo failed\n", r);
2023 		amdgpu_bo_unpin(adev->gfx.rlc.save_restore_obj);
2024 		amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj);
2025 
2026 		amdgpu_bo_unref(&adev->gfx.rlc.save_restore_obj);
2027 		adev->gfx.rlc.save_restore_obj = NULL;
2028 	}
2029 
2030 	if (adev->gfx.rlc.clear_state_obj) {
2031 		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
2032 		if (unlikely(r != 0))
2033 			dev_warn(adev->dev, "(%d) reserve RLC c bo failed\n", r);
2034 		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
2035 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
2036 
2037 		amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
2038 		adev->gfx.rlc.clear_state_obj = NULL;
2039 	}
2040 
2041 	if (adev->gfx.rlc.cp_table_obj) {
2042 		r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
2043 		if (unlikely(r != 0))
2044 			dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
2045 		amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
2046 		amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
2047 
2048 		amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
2049 		adev->gfx.rlc.cp_table_obj = NULL;
2050 	}
2051 }
2052 
2053 static int gfx_v6_0_rlc_init(struct amdgpu_device *adev)
2054 {
2055 	const u32 *src_ptr;
2056 	volatile u32 *dst_ptr;
2057 	u32 dws, i;
2058 	u64 reg_list_mc_addr;
2059 	const struct cs_section_def *cs_data;
2060 	int r;
2061 
2062 	adev->gfx.rlc.reg_list = verde_rlc_save_restore_register_list;
2063 	adev->gfx.rlc.reg_list_size =
2064 			(u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
2065 
2066 	adev->gfx.rlc.cs_data = si_cs_data;
2067 	src_ptr = adev->gfx.rlc.reg_list;
2068 	dws = adev->gfx.rlc.reg_list_size;
2069 	cs_data = adev->gfx.rlc.cs_data;
2070 
2071 	if (src_ptr) {
2072 		/* save restore block */
2073 		if (adev->gfx.rlc.save_restore_obj == NULL) {
2074 
2075 			r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
2076 					     AMDGPU_GEM_DOMAIN_VRAM,
2077 					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
2078 					     NULL, NULL,
2079 					     &adev->gfx.rlc.save_restore_obj);
2080 
2081 			if (r) {
2082 				dev_warn(adev->dev, "(%d) create RLC sr bo failed\n", r);
2083 				return r;
2084 			}
2085 		}
2086 
2087 		r = amdgpu_bo_reserve(adev->gfx.rlc.save_restore_obj, false);
2088 		if (unlikely(r != 0)) {
2089 			gfx_v6_0_rlc_fini(adev);
2090 			return r;
2091 		}
2092 		r = amdgpu_bo_pin(adev->gfx.rlc.save_restore_obj, AMDGPU_GEM_DOMAIN_VRAM,
2093 				  &adev->gfx.rlc.save_restore_gpu_addr);
2094 		if (r) {
2095 			amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj);
2096 			dev_warn(adev->dev, "(%d) pin RLC sr bo failed\n", r);
2097 			gfx_v6_0_rlc_fini(adev);
2098 			return r;
2099 		}
2100 
2101 		r = amdgpu_bo_kmap(adev->gfx.rlc.save_restore_obj, (void **)&adev->gfx.rlc.sr_ptr);
2102 		if (r) {
2103 			dev_warn(adev->dev, "(%d) map RLC sr bo failed\n", r);
2104 			gfx_v6_0_rlc_fini(adev);
2105 			return r;
2106 		}
2107 		/* write the sr buffer */
2108 		dst_ptr = adev->gfx.rlc.sr_ptr;
2109 		for (i = 0; i < adev->gfx.rlc.reg_list_size; i++)
2110 			dst_ptr[i] = cpu_to_le32(src_ptr[i]);
2111 		amdgpu_bo_kunmap(adev->gfx.rlc.save_restore_obj);
2112 		amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj);
2113 	}
2114 
2115 	if (cs_data) {
2116 		/* clear state block */
2117 		adev->gfx.rlc.clear_state_size = gfx_v6_0_get_csb_size(adev);
2118 		dws = adev->gfx.rlc.clear_state_size + (256 / 4);
2119 
2120 		if (adev->gfx.rlc.clear_state_obj == NULL) {
2121 			r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
2122 					     AMDGPU_GEM_DOMAIN_VRAM,
2123 					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
2124 					     NULL, NULL,
2125 					     &adev->gfx.rlc.clear_state_obj);
2126 
2127 			if (r) {
2128 				dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
2129 				gfx_v6_0_rlc_fini(adev);
2130 				return r;
2131 			}
2132 		}
2133 		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
2134 		if (unlikely(r != 0)) {
2135 			gfx_v6_0_rlc_fini(adev);
2136 			return r;
2137 		}
2138 		r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
2139 				  &adev->gfx.rlc.clear_state_gpu_addr);
2140 		if (r) {
2141 			amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
2142 			dev_warn(adev->dev, "(%d) pin RLC c bo failed\n", r);
2143 			gfx_v6_0_rlc_fini(adev);
2144 			return r;
2145 		}
2146 
2147 		r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
2148 		if (r) {
2149 			dev_warn(adev->dev, "(%d) map RLC c bo failed\n", r);
2150 			gfx_v6_0_rlc_fini(adev);
2151 			return r;
2152 		}
2153 		/* set up the cs buffer */
2154 		dst_ptr = adev->gfx.rlc.cs_ptr;
2155 		reg_list_mc_addr = adev->gfx.rlc.clear_state_gpu_addr + 256;
2156 		dst_ptr[0] = cpu_to_le32(upper_32_bits(reg_list_mc_addr));
2157 		dst_ptr[1] = cpu_to_le32(lower_32_bits(reg_list_mc_addr));
2158 		dst_ptr[2] = cpu_to_le32(adev->gfx.rlc.clear_state_size);
2159 		gfx_v6_0_get_csb_buffer(adev, &dst_ptr[(256/4)]);
2160 		amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
2161 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
2162 	}
2163 
2164 	return 0;
2165 }
2166 
2167 static void gfx_v6_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
2168 {
2169 	u32 tmp;
2170 
2171 	tmp = RREG32(RLC_LB_CNTL);
2172 	if (enable)
2173 		tmp |= LOAD_BALANCE_ENABLE;
2174 	else
2175 		tmp &= ~LOAD_BALANCE_ENABLE;
2176 	WREG32(RLC_LB_CNTL, tmp);
2177 
2178 	if (!enable) {
2179 		gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2180 		WREG32(SPI_LB_CU_MASK, 0x00ff);
2181 	}
2182 
2183 }
2184 
2185 static void gfx_v6_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2186 {
2187 	int i;
2188 
2189 	for (i = 0; i < adev->usec_timeout; i++) {
2190 		if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
2191 			break;
2192 		udelay(1);
2193 	}
2194 
2195 	for (i = 0; i < adev->usec_timeout; i++) {
2196 		if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
2197 			break;
2198 		udelay(1);
2199 	}
2200 }
2201 
2202 static void gfx_v6_0_update_rlc(struct amdgpu_device *adev, u32 rlc)
2203 {
2204 	u32 tmp;
2205 
2206 	tmp = RREG32(RLC_CNTL);
2207 	if (tmp != rlc)
2208 		WREG32(RLC_CNTL, rlc);
2209 }
2210 
2211 static u32 gfx_v6_0_halt_rlc(struct amdgpu_device *adev)
2212 {
2213 	u32 data, orig;
2214 
2215 	orig = data = RREG32(RLC_CNTL);
2216 
2217 	if (data & RLC_ENABLE) {
2218 		data &= ~RLC_ENABLE;
2219 		WREG32(RLC_CNTL, data);
2220 
2221 		gfx_v6_0_wait_for_rlc_serdes(adev);
2222 	}
2223 
2224 	return orig;
2225 }
2226 
2227 static void gfx_v6_0_rlc_stop(struct amdgpu_device *adev)
2228 {
2229 	WREG32(RLC_CNTL, 0);
2230 
2231 	gfx_v6_0_enable_gui_idle_interrupt(adev, false);
2232 	gfx_v6_0_wait_for_rlc_serdes(adev);
2233 }
2234 
2235 static void gfx_v6_0_rlc_start(struct amdgpu_device *adev)
2236 {
2237 	WREG32(RLC_CNTL, RLC_ENABLE);
2238 
2239 	gfx_v6_0_enable_gui_idle_interrupt(adev, true);
2240 
2241 	udelay(50);
2242 }
2243 
2244 static void gfx_v6_0_rlc_reset(struct amdgpu_device *adev)
2245 {
2246 	u32 tmp = RREG32(GRBM_SOFT_RESET);
2247 
2248 	tmp |= SOFT_RESET_RLC;
2249 	WREG32(GRBM_SOFT_RESET, tmp);
2250 	udelay(50);
2251 	tmp &= ~SOFT_RESET_RLC;
2252 	WREG32(GRBM_SOFT_RESET, tmp);
2253 	udelay(50);
2254 }
2255 
2256 static bool gfx_v6_0_lbpw_supported(struct amdgpu_device *adev)
2257 {
2258 	u32 tmp;
2259 
2260 	/* Enable LBPW only for DDR3 */
2261 	tmp = RREG32(MC_SEQ_MISC0);
2262 	if ((tmp & 0xF0000000) == 0xB0000000)
2263 		return true;
2264 	return false;
2265 }
2266 static void gfx_v6_0_init_cg(struct amdgpu_device *adev)
2267 {
2268 }
2269 
2270 static int gfx_v6_0_rlc_resume(struct amdgpu_device *adev)
2271 {
2272 	u32 i;
2273 	const struct rlc_firmware_header_v1_0 *hdr;
2274 	const __le32 *fw_data;
2275 	u32 fw_size;
2276 
2277 
2278 	if (!adev->gfx.rlc_fw)
2279 		return -EINVAL;
2280 
2281 	gfx_v6_0_rlc_stop(adev);
2282 	gfx_v6_0_rlc_reset(adev);
2283 	gfx_v6_0_init_pg(adev);
2284 	gfx_v6_0_init_cg(adev);
2285 
2286 	WREG32(RLC_RL_BASE, 0);
2287 	WREG32(RLC_RL_SIZE, 0);
2288 	WREG32(RLC_LB_CNTL, 0);
2289 	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
2290 	WREG32(RLC_LB_CNTR_INIT, 0);
2291 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
2292 
2293 	WREG32(RLC_MC_CNTL, 0);
2294 	WREG32(RLC_UCODE_CNTL, 0);
2295 
2296 	hdr = (const struct rlc_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
2297 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2298 	fw_data = (const __le32 *)
2299 		(adev->gfx.rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2300 
2301 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
2302 
2303 	for (i = 0; i < fw_size; i++) {
2304 		WREG32(RLC_UCODE_ADDR, i);
2305 		WREG32(RLC_UCODE_DATA, le32_to_cpup(fw_data++));
2306 	}
2307 	WREG32(RLC_UCODE_ADDR, 0);
2308 
2309 	gfx_v6_0_enable_lbpw(adev, gfx_v6_0_lbpw_supported(adev));
2310 	gfx_v6_0_rlc_start(adev);
2311 
2312 	return 0;
2313 }
2314 
2315 static void gfx_v6_0_enable_cgcg(struct amdgpu_device *adev, bool enable)
2316 {
2317 	u32 data, orig, tmp;
2318 
2319 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
2320 
2321 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
2322 		gfx_v6_0_enable_gui_idle_interrupt(adev, true);
2323 
2324 		WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
2325 
2326 		tmp = gfx_v6_0_halt_rlc(adev);
2327 
2328 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
2329 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
2330 		WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
2331 
2332 		gfx_v6_0_wait_for_rlc_serdes(adev);
2333 		gfx_v6_0_update_rlc(adev, tmp);
2334 
2335 		WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
2336 
2337 		data |= CGCG_EN | CGLS_EN;
2338 	} else {
2339 		gfx_v6_0_enable_gui_idle_interrupt(adev, false);
2340 
2341 		RREG32(CB_CGTT_SCLK_CTRL);
2342 		RREG32(CB_CGTT_SCLK_CTRL);
2343 		RREG32(CB_CGTT_SCLK_CTRL);
2344 		RREG32(CB_CGTT_SCLK_CTRL);
2345 
2346 		data &= ~(CGCG_EN | CGLS_EN);
2347 	}
2348 
2349 	if (orig != data)
2350 		WREG32(RLC_CGCG_CGLS_CTRL, data);
2351 
2352 }
2353 
2354 static void gfx_v6_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
2355 {
2356 
2357 	u32 data, orig, tmp = 0;
2358 
2359 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
2360 		orig = data = RREG32(CGTS_SM_CTRL_REG);
2361 		data = 0x96940200;
2362 		if (orig != data)
2363 			WREG32(CGTS_SM_CTRL_REG, data);
2364 
2365 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
2366 			orig = data = RREG32(CP_MEM_SLP_CNTL);
2367 			data |= CP_MEM_LS_EN;
2368 			if (orig != data)
2369 				WREG32(CP_MEM_SLP_CNTL, data);
2370 		}
2371 
2372 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
2373 		data &= 0xffffffc0;
2374 		if (orig != data)
2375 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
2376 
2377 		tmp = gfx_v6_0_halt_rlc(adev);
2378 
2379 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
2380 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
2381 		WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
2382 
2383 		gfx_v6_0_update_rlc(adev, tmp);
2384 	} else {
2385 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
2386 		data |= 0x00000003;
2387 		if (orig != data)
2388 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
2389 
2390 		data = RREG32(CP_MEM_SLP_CNTL);
2391 		if (data & CP_MEM_LS_EN) {
2392 			data &= ~CP_MEM_LS_EN;
2393 			WREG32(CP_MEM_SLP_CNTL, data);
2394 		}
2395 		orig = data = RREG32(CGTS_SM_CTRL_REG);
2396 		data |= LS_OVERRIDE | OVERRIDE;
2397 		if (orig != data)
2398 			WREG32(CGTS_SM_CTRL_REG, data);
2399 
2400 		tmp = gfx_v6_0_halt_rlc(adev);
2401 
2402 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
2403 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
2404 		WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
2405 
2406 		gfx_v6_0_update_rlc(adev, tmp);
2407 	}
2408 }
2409 /*
2410 static void gfx_v6_0_update_cg(struct amdgpu_device *adev,
2411 			       bool enable)
2412 {
2413 	gfx_v6_0_enable_gui_idle_interrupt(adev, false);
2414 	if (enable) {
2415 		gfx_v6_0_enable_mgcg(adev, true);
2416 		gfx_v6_0_enable_cgcg(adev, true);
2417 	} else {
2418 		gfx_v6_0_enable_cgcg(adev, false);
2419 		gfx_v6_0_enable_mgcg(adev, false);
2420 	}
2421 	gfx_v6_0_enable_gui_idle_interrupt(adev, true);
2422 }
2423 */
2424 static void gfx_v6_0_enable_sclk_slowdown_on_pu(struct amdgpu_device *adev,
2425 						bool enable)
2426 {
2427 }
2428 
2429 static void gfx_v6_0_enable_sclk_slowdown_on_pd(struct amdgpu_device *adev,
2430 						bool enable)
2431 {
2432 }
2433 
2434 static void gfx_v6_0_enable_cp_pg(struct amdgpu_device *adev, bool enable)
2435 {
2436 	u32 data, orig;
2437 
2438 	orig = data = RREG32(RLC_PG_CNTL);
2439 	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_CP))
2440 		data &= ~0x8000;
2441 	else
2442 		data |= 0x8000;
2443 	if (orig != data)
2444 		WREG32(RLC_PG_CNTL, data);
2445 }
2446 
2447 static void gfx_v6_0_enable_gds_pg(struct amdgpu_device *adev, bool enable)
2448 {
2449 }
2450 /*
2451 static void gfx_v6_0_init_cp_pg_table(struct amdgpu_device *adev)
2452 {
2453 	const __le32 *fw_data;
2454 	volatile u32 *dst_ptr;
2455 	int me, i, max_me = 4;
2456 	u32 bo_offset = 0;
2457 	u32 table_offset, table_size;
2458 
2459 	if (adev->asic_type == CHIP_KAVERI)
2460 		max_me = 5;
2461 
2462 	if (adev->gfx.rlc.cp_table_ptr == NULL)
2463 		return;
2464 
2465 	dst_ptr = adev->gfx.rlc.cp_table_ptr;
2466 	for (me = 0; me < max_me; me++) {
2467 		if (me == 0) {
2468 			const struct gfx_firmware_header_v1_0 *hdr =
2469 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
2470 			fw_data = (const __le32 *)
2471 				(adev->gfx.ce_fw->data +
2472 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2473 			table_offset = le32_to_cpu(hdr->jt_offset);
2474 			table_size = le32_to_cpu(hdr->jt_size);
2475 		} else if (me == 1) {
2476 			const struct gfx_firmware_header_v1_0 *hdr =
2477 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
2478 			fw_data = (const __le32 *)
2479 				(adev->gfx.pfp_fw->data +
2480 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2481 			table_offset = le32_to_cpu(hdr->jt_offset);
2482 			table_size = le32_to_cpu(hdr->jt_size);
2483 		} else if (me == 2) {
2484 			const struct gfx_firmware_header_v1_0 *hdr =
2485 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
2486 			fw_data = (const __le32 *)
2487 				(adev->gfx.me_fw->data +
2488 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2489 			table_offset = le32_to_cpu(hdr->jt_offset);
2490 			table_size = le32_to_cpu(hdr->jt_size);
2491 		} else if (me == 3) {
2492 			const struct gfx_firmware_header_v1_0 *hdr =
2493 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
2494 			fw_data = (const __le32 *)
2495 				(adev->gfx.mec_fw->data +
2496 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2497 			table_offset = le32_to_cpu(hdr->jt_offset);
2498 			table_size = le32_to_cpu(hdr->jt_size);
2499 		} else {
2500 			const struct gfx_firmware_header_v1_0 *hdr =
2501 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
2502 			fw_data = (const __le32 *)
2503 				(adev->gfx.mec2_fw->data +
2504 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2505 			table_offset = le32_to_cpu(hdr->jt_offset);
2506 			table_size = le32_to_cpu(hdr->jt_size);
2507 		}
2508 
2509 		for (i = 0; i < table_size; i ++) {
2510 			dst_ptr[bo_offset + i] =
2511 				cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
2512 		}
2513 
2514 		bo_offset += table_size;
2515 	}
2516 }
2517 */
2518 static void gfx_v6_0_enable_gfx_cgpg(struct amdgpu_device *adev,
2519 				     bool enable)
2520 {
2521 
2522 	u32 tmp;
2523 
2524 	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
2525 		tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
2526 		WREG32(RLC_TTOP_D, tmp);
2527 
2528 		tmp = RREG32(RLC_PG_CNTL);
2529 		tmp |= GFX_PG_ENABLE;
2530 		WREG32(RLC_PG_CNTL, tmp);
2531 
2532 		tmp = RREG32(RLC_AUTO_PG_CTRL);
2533 		tmp |= AUTO_PG_EN;
2534 		WREG32(RLC_AUTO_PG_CTRL, tmp);
2535 	} else {
2536 		tmp = RREG32(RLC_AUTO_PG_CTRL);
2537 		tmp &= ~AUTO_PG_EN;
2538 		WREG32(RLC_AUTO_PG_CTRL, tmp);
2539 
2540 		tmp = RREG32(DB_RENDER_CONTROL);
2541 	}
2542 }
2543 
2544 static u32 gfx_v6_0_get_cu_active_bitmap(struct amdgpu_device *adev,
2545 					 u32 se, u32 sh)
2546 {
2547 
2548 	u32 mask = 0, tmp, tmp1;
2549 	int i;
2550 
2551 	mutex_lock(&adev->grbm_idx_mutex);
2552 	gfx_v6_0_select_se_sh(adev, se, sh, 0xffffffff);
2553 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2554 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2555 	gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2556 	mutex_unlock(&adev->grbm_idx_mutex);
2557 
2558 	tmp &= 0xffff0000;
2559 
2560 	tmp |= tmp1;
2561 	tmp >>= 16;
2562 
2563 	for (i = 0; i < adev->gfx.config.max_cu_per_sh; i ++) {
2564 		mask <<= 1;
2565 		mask |= 1;
2566 	}
2567 
2568 	return (~tmp) & mask;
2569 }
2570 
2571 static void gfx_v6_0_init_ao_cu_mask(struct amdgpu_device *adev)
2572 {
2573 	u32 i, j, k, active_cu_number = 0;
2574 
2575 	u32 mask, counter, cu_bitmap;
2576 	u32 tmp = 0;
2577 
2578 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2579 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2580 			mask = 1;
2581 			cu_bitmap = 0;
2582 			counter  = 0;
2583 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
2584 				if (gfx_v6_0_get_cu_active_bitmap(adev, i, j) & mask) {
2585 					if (counter < 2)
2586 						cu_bitmap |= mask;
2587 					counter++;
2588 				}
2589 				mask <<= 1;
2590 			}
2591 
2592 			active_cu_number += counter;
2593 			tmp |= (cu_bitmap << (i * 16 + j * 8));
2594 		}
2595 	}
2596 
2597 	WREG32(RLC_PG_AO_CU_MASK, tmp);
2598 
2599 	tmp = RREG32(RLC_MAX_PG_CU);
2600 	tmp &= ~MAX_PU_CU_MASK;
2601 	tmp |= MAX_PU_CU(active_cu_number);
2602 	WREG32(RLC_MAX_PG_CU, tmp);
2603 }
2604 
2605 static void gfx_v6_0_enable_gfx_static_mgpg(struct amdgpu_device *adev,
2606 					    bool enable)
2607 {
2608 	u32 data, orig;
2609 
2610 	orig = data = RREG32(RLC_PG_CNTL);
2611 	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG))
2612 		data |= STATIC_PER_CU_PG_ENABLE;
2613 	else
2614 		data &= ~STATIC_PER_CU_PG_ENABLE;
2615 	if (orig != data)
2616 		WREG32(RLC_PG_CNTL, data);
2617 }
2618 
2619 static void gfx_v6_0_enable_gfx_dynamic_mgpg(struct amdgpu_device *adev,
2620 					     bool enable)
2621 {
2622 	u32 data, orig;
2623 
2624 	orig = data = RREG32(RLC_PG_CNTL);
2625 	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG))
2626 		data |= DYN_PER_CU_PG_ENABLE;
2627 	else
2628 		data &= ~DYN_PER_CU_PG_ENABLE;
2629 	if (orig != data)
2630 		WREG32(RLC_PG_CNTL, data);
2631 }
2632 
2633 static void gfx_v6_0_init_gfx_cgpg(struct amdgpu_device *adev)
2634 {
2635 	u32 tmp;
2636 
2637 	WREG32(RLC_SAVE_AND_RESTORE_BASE, adev->gfx.rlc.save_restore_gpu_addr >> 8);
2638 
2639 	tmp = RREG32(RLC_PG_CNTL);
2640 	tmp |= GFX_PG_SRC;
2641 	WREG32(RLC_PG_CNTL, tmp);
2642 
2643 	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, adev->gfx.rlc.clear_state_gpu_addr >> 8);
2644 
2645 	tmp = RREG32(RLC_AUTO_PG_CTRL);
2646 
2647 	tmp &= ~GRBM_REG_SGIT_MASK;
2648 	tmp |= GRBM_REG_SGIT(0x700);
2649 	tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
2650 	WREG32(RLC_AUTO_PG_CTRL, tmp);
2651 }
2652 
2653 static void gfx_v6_0_update_gfx_pg(struct amdgpu_device *adev, bool enable)
2654 {
2655 	gfx_v6_0_enable_gfx_cgpg(adev, enable);
2656 	gfx_v6_0_enable_gfx_static_mgpg(adev, enable);
2657 	gfx_v6_0_enable_gfx_dynamic_mgpg(adev, enable);
2658 }
2659 
2660 static u32 gfx_v6_0_get_csb_size(struct amdgpu_device *adev)
2661 {
2662 	u32 count = 0;
2663 	const struct cs_section_def *sect = NULL;
2664 	const struct cs_extent_def *ext = NULL;
2665 
2666 	if (adev->gfx.rlc.cs_data == NULL)
2667 		return 0;
2668 
2669 	/* begin clear state */
2670 	count += 2;
2671 	/* context control state */
2672 	count += 3;
2673 
2674 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
2675 		for (ext = sect->section; ext->extent != NULL; ++ext) {
2676 			if (sect->id == SECT_CONTEXT)
2677 				count += 2 + ext->reg_count;
2678 			else
2679 				return 0;
2680 		}
2681 	}
2682 	/* pa_sc_raster_config */
2683 	count += 3;
2684 	/* end clear state */
2685 	count += 2;
2686 	/* clear state */
2687 	count += 2;
2688 
2689 	return count;
2690 }
2691 
2692 static void gfx_v6_0_get_csb_buffer(struct amdgpu_device *adev,
2693 				    volatile u32 *buffer)
2694 {
2695 	u32 count = 0, i;
2696 	const struct cs_section_def *sect = NULL;
2697 	const struct cs_extent_def *ext = NULL;
2698 
2699 	if (adev->gfx.rlc.cs_data == NULL)
2700 		return;
2701 	if (buffer == NULL)
2702 		return;
2703 
2704 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2705 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2706 
2707 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2708 	buffer[count++] = cpu_to_le32(0x80000000);
2709 	buffer[count++] = cpu_to_le32(0x80000000);
2710 
2711 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
2712 		for (ext = sect->section; ext->extent != NULL; ++ext) {
2713 			if (sect->id == SECT_CONTEXT) {
2714 				buffer[count++] =
2715 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
2716 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
2717 				for (i = 0; i < ext->reg_count; i++)
2718 					buffer[count++] = cpu_to_le32(ext->extent[i]);
2719 			} else {
2720 				return;
2721 			}
2722 		}
2723 	}
2724 
2725 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
2726 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
2727 
2728 	switch (adev->asic_type) {
2729 	case CHIP_TAHITI:
2730 	case CHIP_PITCAIRN:
2731 		buffer[count++] = cpu_to_le32(0x2a00126a);
2732 		break;
2733 	case CHIP_VERDE:
2734 		buffer[count++] = cpu_to_le32(0x0000124a);
2735 		break;
2736 	case CHIP_OLAND:
2737 		buffer[count++] = cpu_to_le32(0x00000082);
2738 		break;
2739 	case CHIP_HAINAN:
2740 		buffer[count++] = cpu_to_le32(0x00000000);
2741 		break;
2742 	default:
2743 		buffer[count++] = cpu_to_le32(0x00000000);
2744 		break;
2745 	}
2746 
2747 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2748 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
2749 
2750 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
2751 	buffer[count++] = cpu_to_le32(0);
2752 }
2753 
2754 static void gfx_v6_0_init_pg(struct amdgpu_device *adev)
2755 {
2756 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2757 			      AMD_PG_SUPPORT_GFX_SMG |
2758 			      AMD_PG_SUPPORT_GFX_DMG |
2759 			      AMD_PG_SUPPORT_CP |
2760 			      AMD_PG_SUPPORT_GDS |
2761 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
2762 		gfx_v6_0_enable_sclk_slowdown_on_pu(adev, true);
2763 		gfx_v6_0_enable_sclk_slowdown_on_pd(adev, true);
2764 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) {
2765 			gfx_v6_0_init_gfx_cgpg(adev);
2766 			gfx_v6_0_enable_cp_pg(adev, true);
2767 			gfx_v6_0_enable_gds_pg(adev, true);
2768 		} else {
2769 			WREG32(RLC_SAVE_AND_RESTORE_BASE, adev->gfx.rlc.save_restore_gpu_addr >> 8);
2770 			WREG32(RLC_CLEAR_STATE_RESTORE_BASE, adev->gfx.rlc.clear_state_gpu_addr >> 8);
2771 
2772 		}
2773 		gfx_v6_0_init_ao_cu_mask(adev);
2774 		gfx_v6_0_update_gfx_pg(adev, true);
2775 	} else {
2776 
2777 		WREG32(RLC_SAVE_AND_RESTORE_BASE, adev->gfx.rlc.save_restore_gpu_addr >> 8);
2778 		WREG32(RLC_CLEAR_STATE_RESTORE_BASE, adev->gfx.rlc.clear_state_gpu_addr >> 8);
2779 	}
2780 }
2781 
2782 static void gfx_v6_0_fini_pg(struct amdgpu_device *adev)
2783 {
2784 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2785 			      AMD_PG_SUPPORT_GFX_SMG |
2786 			      AMD_PG_SUPPORT_GFX_DMG |
2787 			      AMD_PG_SUPPORT_CP |
2788 			      AMD_PG_SUPPORT_GDS |
2789 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
2790 		gfx_v6_0_update_gfx_pg(adev, false);
2791 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) {
2792 			gfx_v6_0_enable_cp_pg(adev, false);
2793 			gfx_v6_0_enable_gds_pg(adev, false);
2794 		}
2795 	}
2796 }
2797 
2798 static uint64_t gfx_v6_0_get_gpu_clock_counter(struct amdgpu_device *adev)
2799 {
2800 	uint64_t clock;
2801 
2802 	mutex_lock(&adev->gfx.gpu_clock_mutex);
2803 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
2804 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
2805 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
2806 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
2807 	return clock;
2808 }
2809 
2810 static void gfx_v6_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
2811 {
2812 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2813 	amdgpu_ring_write(ring, 0x80000000);
2814 	amdgpu_ring_write(ring, 0);
2815 }
2816 
2817 static const struct amdgpu_gfx_funcs gfx_v6_0_gfx_funcs = {
2818 	.get_gpu_clock_counter = &gfx_v6_0_get_gpu_clock_counter,
2819 	.select_se_sh = &gfx_v6_0_select_se_sh,
2820 };
2821 
2822 static int gfx_v6_0_early_init(void *handle)
2823 {
2824 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2825 
2826 	adev->gfx.num_gfx_rings = GFX6_NUM_GFX_RINGS;
2827 	adev->gfx.num_compute_rings = GFX6_NUM_COMPUTE_RINGS;
2828 	adev->gfx.funcs = &gfx_v6_0_gfx_funcs;
2829 	gfx_v6_0_set_ring_funcs(adev);
2830 	gfx_v6_0_set_irq_funcs(adev);
2831 
2832 	return 0;
2833 }
2834 
2835 static int gfx_v6_0_sw_init(void *handle)
2836 {
2837 	struct amdgpu_ring *ring;
2838 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2839 	int i, r;
2840 
2841 	r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
2842 	if (r)
2843 		return r;
2844 
2845 	r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
2846 	if (r)
2847 		return r;
2848 
2849 	r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
2850 	if (r)
2851 		return r;
2852 
2853 	gfx_v6_0_scratch_init(adev);
2854 
2855 	r = gfx_v6_0_init_microcode(adev);
2856 	if (r) {
2857 		DRM_ERROR("Failed to load gfx firmware!\n");
2858 		return r;
2859 	}
2860 
2861 	r = gfx_v6_0_rlc_init(adev);
2862 	if (r) {
2863 		DRM_ERROR("Failed to init rlc BOs!\n");
2864 		return r;
2865 	}
2866 
2867 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2868 		ring = &adev->gfx.gfx_ring[i];
2869 		ring->ring_obj = NULL;
2870 		sprintf(ring->name, "gfx");
2871 		r = amdgpu_ring_init(adev, ring, 1024,
2872 				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP);
2873 		if (r)
2874 			return r;
2875 	}
2876 
2877 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2878 		unsigned irq_type;
2879 
2880 		if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2881 			DRM_ERROR("Too many (%d) compute rings!\n", i);
2882 			break;
2883 		}
2884 		ring = &adev->gfx.compute_ring[i];
2885 		ring->ring_obj = NULL;
2886 		ring->use_doorbell = false;
2887 		ring->doorbell_index = 0;
2888 		ring->me = 1;
2889 		ring->pipe = i;
2890 		ring->queue = i;
2891 		sprintf(ring->name, "comp %d.%d.%d", ring->me, ring->pipe, ring->queue);
2892 		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2893 		r = amdgpu_ring_init(adev, ring, 1024,
2894 				     &adev->gfx.eop_irq, irq_type);
2895 		if (r)
2896 			return r;
2897 	}
2898 
2899 	return r;
2900 }
2901 
2902 static int gfx_v6_0_sw_fini(void *handle)
2903 {
2904 	int i;
2905 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2906 
2907 	amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
2908 	amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
2909 	amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
2910 
2911 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2912 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2913 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2914 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2915 
2916 	gfx_v6_0_rlc_fini(adev);
2917 
2918 	return 0;
2919 }
2920 
2921 static int gfx_v6_0_hw_init(void *handle)
2922 {
2923 	int r;
2924 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2925 
2926 	gfx_v6_0_gpu_init(adev);
2927 
2928 	r = gfx_v6_0_rlc_resume(adev);
2929 	if (r)
2930 		return r;
2931 
2932 	r = gfx_v6_0_cp_resume(adev);
2933 	if (r)
2934 		return r;
2935 
2936 	adev->gfx.ce_ram_size = 0x8000;
2937 
2938 	return r;
2939 }
2940 
2941 static int gfx_v6_0_hw_fini(void *handle)
2942 {
2943 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2944 
2945 	gfx_v6_0_cp_enable(adev, false);
2946 	gfx_v6_0_rlc_stop(adev);
2947 	gfx_v6_0_fini_pg(adev);
2948 
2949 	return 0;
2950 }
2951 
2952 static int gfx_v6_0_suspend(void *handle)
2953 {
2954 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2955 
2956 	return gfx_v6_0_hw_fini(adev);
2957 }
2958 
2959 static int gfx_v6_0_resume(void *handle)
2960 {
2961 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2962 
2963 	return gfx_v6_0_hw_init(adev);
2964 }
2965 
2966 static bool gfx_v6_0_is_idle(void *handle)
2967 {
2968 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2969 
2970 	if (RREG32(GRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK)
2971 		return false;
2972 	else
2973 		return true;
2974 }
2975 
2976 static int gfx_v6_0_wait_for_idle(void *handle)
2977 {
2978 	unsigned i;
2979 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2980 
2981 	for (i = 0; i < adev->usec_timeout; i++) {
2982 		if (gfx_v6_0_is_idle(handle))
2983 			return 0;
2984 		udelay(1);
2985 	}
2986 	return -ETIMEDOUT;
2987 }
2988 
2989 static int gfx_v6_0_soft_reset(void *handle)
2990 {
2991 	return 0;
2992 }
2993 
2994 static void gfx_v6_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
2995 						 enum amdgpu_interrupt_state state)
2996 {
2997 	u32 cp_int_cntl;
2998 
2999 	switch (state) {
3000 	case AMDGPU_IRQ_STATE_DISABLE:
3001 		cp_int_cntl = RREG32(CP_INT_CNTL_RING0);
3002 		cp_int_cntl &= ~CP_INT_CNTL_RING__TIME_STAMP_INT_ENABLE_MASK;
3003 		WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
3004 		break;
3005 	case AMDGPU_IRQ_STATE_ENABLE:
3006 		cp_int_cntl = RREG32(CP_INT_CNTL_RING0);
3007 		cp_int_cntl |= CP_INT_CNTL_RING__TIME_STAMP_INT_ENABLE_MASK;
3008 		WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
3009 		break;
3010 	default:
3011 		break;
3012 	}
3013 }
3014 
3015 static void gfx_v6_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
3016 						     int ring,
3017 						     enum amdgpu_interrupt_state state)
3018 {
3019 	u32 cp_int_cntl;
3020 	switch (state){
3021 	case AMDGPU_IRQ_STATE_DISABLE:
3022 		if (ring == 0) {
3023 			cp_int_cntl = RREG32(CP_INT_CNTL_RING1);
3024 			cp_int_cntl &= ~CP_INT_CNTL_RING__TIME_STAMP_INT_ENABLE_MASK;
3025 			WREG32(CP_INT_CNTL_RING1, cp_int_cntl);
3026 			break;
3027 		} else {
3028 			cp_int_cntl = RREG32(CP_INT_CNTL_RING2);
3029 			cp_int_cntl &= ~CP_INT_CNTL_RING__TIME_STAMP_INT_ENABLE_MASK;
3030 			WREG32(CP_INT_CNTL_RING2, cp_int_cntl);
3031 			break;
3032 
3033 		}
3034 	case AMDGPU_IRQ_STATE_ENABLE:
3035 		if (ring == 0) {
3036 			cp_int_cntl = RREG32(CP_INT_CNTL_RING1);
3037 			cp_int_cntl |= CP_INT_CNTL_RING__TIME_STAMP_INT_ENABLE_MASK;
3038 			WREG32(CP_INT_CNTL_RING1, cp_int_cntl);
3039 			break;
3040 		} else {
3041 			cp_int_cntl = RREG32(CP_INT_CNTL_RING2);
3042 			cp_int_cntl |= CP_INT_CNTL_RING__TIME_STAMP_INT_ENABLE_MASK;
3043 			WREG32(CP_INT_CNTL_RING2, cp_int_cntl);
3044 			break;
3045 
3046 		}
3047 
3048 	default:
3049 		BUG();
3050 		break;
3051 
3052 	}
3053 }
3054 
3055 static int gfx_v6_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
3056 					     struct amdgpu_irq_src *src,
3057 					     unsigned type,
3058 					     enum amdgpu_interrupt_state state)
3059 {
3060 	u32 cp_int_cntl;
3061 
3062 	switch (state) {
3063 	case AMDGPU_IRQ_STATE_DISABLE:
3064 		cp_int_cntl = RREG32(CP_INT_CNTL_RING0);
3065 		cp_int_cntl &= ~CP_INT_CNTL_RING0__PRIV_REG_INT_ENABLE_MASK;
3066 		WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
3067 		break;
3068 	case AMDGPU_IRQ_STATE_ENABLE:
3069 		cp_int_cntl = RREG32(CP_INT_CNTL_RING0);
3070 		cp_int_cntl |= CP_INT_CNTL_RING0__PRIV_REG_INT_ENABLE_MASK;
3071 		WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
3072 		break;
3073 	default:
3074 		break;
3075 	}
3076 
3077 	return 0;
3078 }
3079 
3080 static int gfx_v6_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
3081 					      struct amdgpu_irq_src *src,
3082 					      unsigned type,
3083 					      enum amdgpu_interrupt_state state)
3084 {
3085 	u32 cp_int_cntl;
3086 
3087 	switch (state) {
3088 	case AMDGPU_IRQ_STATE_DISABLE:
3089 		cp_int_cntl = RREG32(CP_INT_CNTL_RING0);
3090 		cp_int_cntl &= ~CP_INT_CNTL_RING0__PRIV_INSTR_INT_ENABLE_MASK;
3091 		WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
3092 		break;
3093 	case AMDGPU_IRQ_STATE_ENABLE:
3094 		cp_int_cntl = RREG32(CP_INT_CNTL_RING0);
3095 		cp_int_cntl |= CP_INT_CNTL_RING0__PRIV_INSTR_INT_ENABLE_MASK;
3096 		WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
3097 		break;
3098 	default:
3099 		break;
3100 	}
3101 
3102 	return 0;
3103 }
3104 
3105 static int gfx_v6_0_set_eop_interrupt_state(struct amdgpu_device *adev,
3106 					    struct amdgpu_irq_src *src,
3107 					    unsigned type,
3108 					    enum amdgpu_interrupt_state state)
3109 {
3110 	switch (type) {
3111 	case AMDGPU_CP_IRQ_GFX_EOP:
3112 		gfx_v6_0_set_gfx_eop_interrupt_state(adev, state);
3113 		break;
3114 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
3115 		gfx_v6_0_set_compute_eop_interrupt_state(adev, 0, state);
3116 		break;
3117 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
3118 		gfx_v6_0_set_compute_eop_interrupt_state(adev, 1, state);
3119 		break;
3120 	default:
3121 		break;
3122 	}
3123 	return 0;
3124 }
3125 
3126 static int gfx_v6_0_eop_irq(struct amdgpu_device *adev,
3127 			    struct amdgpu_irq_src *source,
3128 			    struct amdgpu_iv_entry *entry)
3129 {
3130 	switch (entry->ring_id) {
3131 	case 0:
3132 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
3133 		break;
3134 	case 1:
3135 	case 2:
3136 		amdgpu_fence_process(&adev->gfx.compute_ring[entry->ring_id -1]);
3137 		break;
3138 	default:
3139 		break;
3140 	}
3141 	return 0;
3142 }
3143 
3144 static int gfx_v6_0_priv_reg_irq(struct amdgpu_device *adev,
3145 				 struct amdgpu_irq_src *source,
3146 				 struct amdgpu_iv_entry *entry)
3147 {
3148 	DRM_ERROR("Illegal register access in command stream\n");
3149 	schedule_work(&adev->reset_work);
3150 	return 0;
3151 }
3152 
3153 static int gfx_v6_0_priv_inst_irq(struct amdgpu_device *adev,
3154 				  struct amdgpu_irq_src *source,
3155 				  struct amdgpu_iv_entry *entry)
3156 {
3157 	DRM_ERROR("Illegal instruction in command stream\n");
3158 	schedule_work(&adev->reset_work);
3159 	return 0;
3160 }
3161 
3162 static int gfx_v6_0_set_clockgating_state(void *handle,
3163 					  enum amd_clockgating_state state)
3164 {
3165 	bool gate = false;
3166 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3167 
3168 	if (state == AMD_CG_STATE_GATE)
3169 		gate = true;
3170 
3171 	gfx_v6_0_enable_gui_idle_interrupt(adev, false);
3172 	if (gate) {
3173 		gfx_v6_0_enable_mgcg(adev, true);
3174 		gfx_v6_0_enable_cgcg(adev, true);
3175 	} else {
3176 		gfx_v6_0_enable_cgcg(adev, false);
3177 		gfx_v6_0_enable_mgcg(adev, false);
3178 	}
3179 	gfx_v6_0_enable_gui_idle_interrupt(adev, true);
3180 
3181 	return 0;
3182 }
3183 
3184 static int gfx_v6_0_set_powergating_state(void *handle,
3185 					  enum amd_powergating_state state)
3186 {
3187 	bool gate = false;
3188 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3189 
3190 	if (state == AMD_PG_STATE_GATE)
3191 		gate = true;
3192 
3193 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3194 			      AMD_PG_SUPPORT_GFX_SMG |
3195 			      AMD_PG_SUPPORT_GFX_DMG |
3196 			      AMD_PG_SUPPORT_CP |
3197 			      AMD_PG_SUPPORT_GDS |
3198 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
3199 		gfx_v6_0_update_gfx_pg(adev, gate);
3200 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) {
3201 			gfx_v6_0_enable_cp_pg(adev, gate);
3202 			gfx_v6_0_enable_gds_pg(adev, gate);
3203 		}
3204 	}
3205 
3206 	return 0;
3207 }
3208 
3209 static const struct amd_ip_funcs gfx_v6_0_ip_funcs = {
3210 	.name = "gfx_v6_0",
3211 	.early_init = gfx_v6_0_early_init,
3212 	.late_init = NULL,
3213 	.sw_init = gfx_v6_0_sw_init,
3214 	.sw_fini = gfx_v6_0_sw_fini,
3215 	.hw_init = gfx_v6_0_hw_init,
3216 	.hw_fini = gfx_v6_0_hw_fini,
3217 	.suspend = gfx_v6_0_suspend,
3218 	.resume = gfx_v6_0_resume,
3219 	.is_idle = gfx_v6_0_is_idle,
3220 	.wait_for_idle = gfx_v6_0_wait_for_idle,
3221 	.soft_reset = gfx_v6_0_soft_reset,
3222 	.set_clockgating_state = gfx_v6_0_set_clockgating_state,
3223 	.set_powergating_state = gfx_v6_0_set_powergating_state,
3224 };
3225 
3226 static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_gfx = {
3227 	.type = AMDGPU_RING_TYPE_GFX,
3228 	.align_mask = 0xff,
3229 	.nop = 0x80000000,
3230 	.get_rptr = gfx_v6_0_ring_get_rptr,
3231 	.get_wptr = gfx_v6_0_ring_get_wptr,
3232 	.set_wptr = gfx_v6_0_ring_set_wptr_gfx,
3233 	.emit_frame_size =
3234 		5 + /* gfx_v6_0_ring_emit_hdp_flush */
3235 		5 + /* gfx_v6_0_ring_emit_hdp_invalidate */
3236 		14 + 14 + 14 + /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */
3237 		7 + 4 + /* gfx_v6_0_ring_emit_pipeline_sync */
3238 		17 + 6 + /* gfx_v6_0_ring_emit_vm_flush */
3239 		3, /* gfx_v6_ring_emit_cntxcntl */
3240 	.emit_ib_size = 6, /* gfx_v6_0_ring_emit_ib */
3241 	.emit_ib = gfx_v6_0_ring_emit_ib,
3242 	.emit_fence = gfx_v6_0_ring_emit_fence,
3243 	.emit_pipeline_sync = gfx_v6_0_ring_emit_pipeline_sync,
3244 	.emit_vm_flush = gfx_v6_0_ring_emit_vm_flush,
3245 	.emit_hdp_flush = gfx_v6_0_ring_emit_hdp_flush,
3246 	.emit_hdp_invalidate = gfx_v6_0_ring_emit_hdp_invalidate,
3247 	.test_ring = gfx_v6_0_ring_test_ring,
3248 	.test_ib = gfx_v6_0_ring_test_ib,
3249 	.insert_nop = amdgpu_ring_insert_nop,
3250 	.emit_cntxcntl = gfx_v6_ring_emit_cntxcntl,
3251 };
3252 
3253 static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_compute = {
3254 	.type = AMDGPU_RING_TYPE_COMPUTE,
3255 	.align_mask = 0xff,
3256 	.nop = 0x80000000,
3257 	.get_rptr = gfx_v6_0_ring_get_rptr,
3258 	.get_wptr = gfx_v6_0_ring_get_wptr,
3259 	.set_wptr = gfx_v6_0_ring_set_wptr_compute,
3260 	.emit_frame_size =
3261 		5 + /* gfx_v6_0_ring_emit_hdp_flush */
3262 		5 + /* gfx_v6_0_ring_emit_hdp_invalidate */
3263 		7 + /* gfx_v6_0_ring_emit_pipeline_sync */
3264 		17 + /* gfx_v6_0_ring_emit_vm_flush */
3265 		14 + 14 + 14, /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */
3266 	.emit_ib_size = 6, /* gfx_v6_0_ring_emit_ib */
3267 	.emit_ib = gfx_v6_0_ring_emit_ib,
3268 	.emit_fence = gfx_v6_0_ring_emit_fence,
3269 	.emit_pipeline_sync = gfx_v6_0_ring_emit_pipeline_sync,
3270 	.emit_vm_flush = gfx_v6_0_ring_emit_vm_flush,
3271 	.emit_hdp_flush = gfx_v6_0_ring_emit_hdp_flush,
3272 	.emit_hdp_invalidate = gfx_v6_0_ring_emit_hdp_invalidate,
3273 	.test_ring = gfx_v6_0_ring_test_ring,
3274 	.test_ib = gfx_v6_0_ring_test_ib,
3275 	.insert_nop = amdgpu_ring_insert_nop,
3276 };
3277 
3278 static void gfx_v6_0_set_ring_funcs(struct amdgpu_device *adev)
3279 {
3280 	int i;
3281 
3282 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3283 		adev->gfx.gfx_ring[i].funcs = &gfx_v6_0_ring_funcs_gfx;
3284 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
3285 		adev->gfx.compute_ring[i].funcs = &gfx_v6_0_ring_funcs_compute;
3286 }
3287 
3288 static const struct amdgpu_irq_src_funcs gfx_v6_0_eop_irq_funcs = {
3289 	.set = gfx_v6_0_set_eop_interrupt_state,
3290 	.process = gfx_v6_0_eop_irq,
3291 };
3292 
3293 static const struct amdgpu_irq_src_funcs gfx_v6_0_priv_reg_irq_funcs = {
3294 	.set = gfx_v6_0_set_priv_reg_fault_state,
3295 	.process = gfx_v6_0_priv_reg_irq,
3296 };
3297 
3298 static const struct amdgpu_irq_src_funcs gfx_v6_0_priv_inst_irq_funcs = {
3299 	.set = gfx_v6_0_set_priv_inst_fault_state,
3300 	.process = gfx_v6_0_priv_inst_irq,
3301 };
3302 
3303 static void gfx_v6_0_set_irq_funcs(struct amdgpu_device *adev)
3304 {
3305 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
3306 	adev->gfx.eop_irq.funcs = &gfx_v6_0_eop_irq_funcs;
3307 
3308 	adev->gfx.priv_reg_irq.num_types = 1;
3309 	adev->gfx.priv_reg_irq.funcs = &gfx_v6_0_priv_reg_irq_funcs;
3310 
3311 	adev->gfx.priv_inst_irq.num_types = 1;
3312 	adev->gfx.priv_inst_irq.funcs = &gfx_v6_0_priv_inst_irq_funcs;
3313 }
3314 
3315 static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev)
3316 {
3317 	int i, j, k, counter, active_cu_number = 0;
3318 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
3319 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
3320 
3321 	memset(cu_info, 0, sizeof(*cu_info));
3322 
3323 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3324 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3325 			mask = 1;
3326 			ao_bitmap = 0;
3327 			counter = 0;
3328 			bitmap = gfx_v6_0_get_cu_active_bitmap(adev, i, j);
3329 			cu_info->bitmap[i][j] = bitmap;
3330 
3331 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
3332 				if (bitmap & mask) {
3333 					if (counter < 2)
3334 						ao_bitmap |= mask;
3335 					counter ++;
3336 				}
3337 				mask <<= 1;
3338 			}
3339 			active_cu_number += counter;
3340 			ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
3341 		}
3342 	}
3343 
3344 	cu_info->number = active_cu_number;
3345 	cu_info->ao_cu_mask = ao_cu_mask;
3346 }
3347 
3348 const struct amdgpu_ip_block_version gfx_v6_0_ip_block =
3349 {
3350 	.type = AMD_IP_BLOCK_TYPE_GFX,
3351 	.major = 6,
3352 	.minor = 0,
3353 	.rev = 0,
3354 	.funcs = &gfx_v6_0_ip_funcs,
3355 };
3356