1 /*
2  * Copyright 2021 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/delay.h>
24 #include <linux/kernel.h>
25 #include <linux/firmware.h>
26 #include <linux/module.h>
27 #include <linux/pci.h>
28 #include "amdgpu.h"
29 #include "amdgpu_gfx.h"
30 #include "amdgpu_psp.h"
31 #include "amdgpu_smu.h"
32 #include "amdgpu_atomfirmware.h"
33 #include "imu_v11_0.h"
34 #include "soc21.h"
35 #include "nvd.h"
36 
37 #include "gc/gc_11_0_0_offset.h"
38 #include "gc/gc_11_0_0_sh_mask.h"
39 #include "smuio/smuio_13_0_6_offset.h"
40 #include "smuio/smuio_13_0_6_sh_mask.h"
41 #include "navi10_enum.h"
42 #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h"
43 
44 #include "soc15.h"
45 #include "soc15d.h"
46 #include "clearstate_gfx11.h"
47 #include "v11_structs.h"
48 #include "gfx_v11_0.h"
49 #include "nbio_v4_3.h"
50 #include "mes_v11_0.h"
51 
52 #define GFX11_NUM_GFX_RINGS		1
53 #define GFX11_MEC_HPD_SIZE	2048
54 
55 #define RLCG_UCODE_LOADING_START_ADDRESS	0x00002000L
56 #define RLC_PG_DELAY_3_DEFAULT_GC_11_0_1	0x1388
57 
58 #define regCGTT_WD_CLK_CTRL		0x5086
59 #define regCGTT_WD_CLK_CTRL_BASE_IDX	1
60 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1	0x4e7e
61 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1_BASE_IDX	1
62 
63 MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin");
64 MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin");
65 MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin");
66 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin");
67 MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin");
68 MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin");
69 MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin");
70 MODULE_FIRMWARE("amdgpu/gc_11_0_1_mec.bin");
71 MODULE_FIRMWARE("amdgpu/gc_11_0_1_rlc.bin");
72 MODULE_FIRMWARE("amdgpu/gc_11_0_2_pfp.bin");
73 MODULE_FIRMWARE("amdgpu/gc_11_0_2_me.bin");
74 MODULE_FIRMWARE("amdgpu/gc_11_0_2_mec.bin");
75 MODULE_FIRMWARE("amdgpu/gc_11_0_2_rlc.bin");
76 MODULE_FIRMWARE("amdgpu/gc_11_0_3_pfp.bin");
77 MODULE_FIRMWARE("amdgpu/gc_11_0_3_me.bin");
78 MODULE_FIRMWARE("amdgpu/gc_11_0_3_mec.bin");
79 MODULE_FIRMWARE("amdgpu/gc_11_0_3_rlc.bin");
80 
81 static const struct soc15_reg_golden golden_settings_gc_11_0_1[] =
82 {
83 	SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_GS_NGG_CLK_CTRL, 0x9fff8fff, 0x00000010),
84 	SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_WD_CLK_CTRL, 0xffff8fff, 0x00000010),
85 	SOC15_REG_GOLDEN_VALUE(GC, 0, regCPF_GCR_CNTL, 0x0007ffff, 0x0000c200),
86 	SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xffff001b, 0x00f01988),
87 	SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf0ffffff, 0x00880007),
88 	SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_ENHANCE_3, 0xfffffffd, 0x00000008),
89 	SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_VRS_SURFACE_CNTL_1, 0xfff891ff, 0x55480100),
90 	SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000),
91 	SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xfcffffff, 0x0000000a)
92 };
93 
94 #define DEFAULT_SH_MEM_CONFIG \
95 	((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
96 	 (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
97 	 (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT))
98 
99 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev);
100 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev);
101 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev);
102 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev);
103 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev);
104 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev);
105 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev);
106 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
107                                  struct amdgpu_cu_info *cu_info);
108 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev);
109 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
110 				   u32 sh_num, u32 instance);
111 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev);
112 
113 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
114 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
115 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
116 				     uint32_t val);
117 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
118 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
119 					   uint16_t pasid, uint32_t flush_type,
120 					   bool all_hub, uint8_t dst_sel);
121 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev);
122 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev);
123 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
124 				      bool enable);
125 
126 static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
127 {
128 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
129 	amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
130 			  PACKET3_SET_RESOURCES_QUEUE_TYPE(0));	/* vmid_mask:0 queue_type:0 (KIQ) */
131 	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
132 	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
133 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
134 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
135 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
136 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
137 }
138 
139 static void gfx11_kiq_map_queues(struct amdgpu_ring *kiq_ring,
140 				 struct amdgpu_ring *ring)
141 {
142 	uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
143 	uint64_t wptr_addr = ring->wptr_gpu_addr;
144 	uint32_t me = 0, eng_sel = 0;
145 
146 	switch (ring->funcs->type) {
147 	case AMDGPU_RING_TYPE_COMPUTE:
148 		me = 1;
149 		eng_sel = 0;
150 		break;
151 	case AMDGPU_RING_TYPE_GFX:
152 		me = 0;
153 		eng_sel = 4;
154 		break;
155 	case AMDGPU_RING_TYPE_MES:
156 		me = 2;
157 		eng_sel = 5;
158 		break;
159 	default:
160 		WARN_ON(1);
161 	}
162 
163 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
164 	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
165 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
166 			  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
167 			  PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
168 			  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
169 			  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
170 			  PACKET3_MAP_QUEUES_ME((me)) |
171 			  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
172 			  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
173 			  PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
174 			  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
175 	amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
176 	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
177 	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
178 	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
179 	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
180 }
181 
182 static void gfx11_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
183 				   struct amdgpu_ring *ring,
184 				   enum amdgpu_unmap_queues_action action,
185 				   u64 gpu_addr, u64 seq)
186 {
187 	struct amdgpu_device *adev = kiq_ring->adev;
188 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
189 
190 	if (adev->enable_mes && !adev->gfx.kiq.ring.sched.ready) {
191 		amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq);
192 		return;
193 	}
194 
195 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
196 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
197 			  PACKET3_UNMAP_QUEUES_ACTION(action) |
198 			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
199 			  PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
200 			  PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
201 	amdgpu_ring_write(kiq_ring,
202 		  PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
203 
204 	if (action == PREEMPT_QUEUES_NO_UNMAP) {
205 		amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
206 		amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
207 		amdgpu_ring_write(kiq_ring, seq);
208 	} else {
209 		amdgpu_ring_write(kiq_ring, 0);
210 		amdgpu_ring_write(kiq_ring, 0);
211 		amdgpu_ring_write(kiq_ring, 0);
212 	}
213 }
214 
215 static void gfx11_kiq_query_status(struct amdgpu_ring *kiq_ring,
216 				   struct amdgpu_ring *ring,
217 				   u64 addr,
218 				   u64 seq)
219 {
220 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
221 
222 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
223 	amdgpu_ring_write(kiq_ring,
224 			  PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
225 			  PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
226 			  PACKET3_QUERY_STATUS_COMMAND(2));
227 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
228 			  PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
229 			  PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
230 	amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
231 	amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
232 	amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
233 	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
234 }
235 
236 static void gfx11_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
237 				uint16_t pasid, uint32_t flush_type,
238 				bool all_hub)
239 {
240 	gfx_v11_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1);
241 }
242 
243 static const struct kiq_pm4_funcs gfx_v11_0_kiq_pm4_funcs = {
244 	.kiq_set_resources = gfx11_kiq_set_resources,
245 	.kiq_map_queues = gfx11_kiq_map_queues,
246 	.kiq_unmap_queues = gfx11_kiq_unmap_queues,
247 	.kiq_query_status = gfx11_kiq_query_status,
248 	.kiq_invalidate_tlbs = gfx11_kiq_invalidate_tlbs,
249 	.set_resources_size = 8,
250 	.map_queues_size = 7,
251 	.unmap_queues_size = 6,
252 	.query_status_size = 7,
253 	.invalidate_tlbs_size = 2,
254 };
255 
256 static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
257 {
258 	adev->gfx.kiq.pmf = &gfx_v11_0_kiq_pm4_funcs;
259 }
260 
261 static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev)
262 {
263 	switch (adev->ip_versions[GC_HWIP][0]) {
264 	case IP_VERSION(11, 0, 1):
265 		soc15_program_register_sequence(adev,
266 						golden_settings_gc_11_0_1,
267 						(const u32)ARRAY_SIZE(golden_settings_gc_11_0_1));
268 		break;
269 	default:
270 		break;
271 	}
272 }
273 
274 static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
275 				       bool wc, uint32_t reg, uint32_t val)
276 {
277 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
278 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
279 			  WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0));
280 	amdgpu_ring_write(ring, reg);
281 	amdgpu_ring_write(ring, 0);
282 	amdgpu_ring_write(ring, val);
283 }
284 
285 static void gfx_v11_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
286 				  int mem_space, int opt, uint32_t addr0,
287 				  uint32_t addr1, uint32_t ref, uint32_t mask,
288 				  uint32_t inv)
289 {
290 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
291 	amdgpu_ring_write(ring,
292 			  /* memory (1) or register (0) */
293 			  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
294 			   WAIT_REG_MEM_OPERATION(opt) | /* wait */
295 			   WAIT_REG_MEM_FUNCTION(3) |  /* equal */
296 			   WAIT_REG_MEM_ENGINE(eng_sel)));
297 
298 	if (mem_space)
299 		BUG_ON(addr0 & 0x3); /* Dword align */
300 	amdgpu_ring_write(ring, addr0);
301 	amdgpu_ring_write(ring, addr1);
302 	amdgpu_ring_write(ring, ref);
303 	amdgpu_ring_write(ring, mask);
304 	amdgpu_ring_write(ring, inv); /* poll interval */
305 }
306 
307 static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring)
308 {
309 	struct amdgpu_device *adev = ring->adev;
310 	uint32_t scratch = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
311 	uint32_t tmp = 0;
312 	unsigned i;
313 	int r;
314 
315 	WREG32(scratch, 0xCAFEDEAD);
316 	r = amdgpu_ring_alloc(ring, 5);
317 	if (r) {
318 		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
319 			  ring->idx, r);
320 		return r;
321 	}
322 
323 	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) {
324 		gfx_v11_0_ring_emit_wreg(ring, scratch, 0xDEADBEEF);
325 	} else {
326 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
327 		amdgpu_ring_write(ring, scratch -
328 				  PACKET3_SET_UCONFIG_REG_START);
329 		amdgpu_ring_write(ring, 0xDEADBEEF);
330 	}
331 	amdgpu_ring_commit(ring);
332 
333 	for (i = 0; i < adev->usec_timeout; i++) {
334 		tmp = RREG32(scratch);
335 		if (tmp == 0xDEADBEEF)
336 			break;
337 		if (amdgpu_emu_mode == 1)
338 			msleep(1);
339 		else
340 			udelay(1);
341 	}
342 
343 	if (i >= adev->usec_timeout)
344 		r = -ETIMEDOUT;
345 	return r;
346 }
347 
348 static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
349 {
350 	struct amdgpu_device *adev = ring->adev;
351 	struct amdgpu_ib ib;
352 	struct dma_fence *f = NULL;
353 	unsigned index;
354 	uint64_t gpu_addr;
355 	volatile uint32_t *cpu_ptr;
356 	long r;
357 
358 	/* MES KIQ fw hasn't indirect buffer support for now */
359 	if (adev->enable_mes_kiq &&
360 	    ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
361 		return 0;
362 
363 	memset(&ib, 0, sizeof(ib));
364 
365 	if (ring->is_mes_queue) {
366 		uint32_t padding, offset;
367 
368 		offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
369 		padding = amdgpu_mes_ctx_get_offs(ring,
370 						  AMDGPU_MES_CTX_PADDING_OFFS);
371 
372 		ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
373 		ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
374 
375 		gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding);
376 		cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding);
377 		*cpu_ptr = cpu_to_le32(0xCAFEDEAD);
378 	} else {
379 		r = amdgpu_device_wb_get(adev, &index);
380 		if (r)
381 			return r;
382 
383 		gpu_addr = adev->wb.gpu_addr + (index * 4);
384 		adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
385 		cpu_ptr = &adev->wb.wb[index];
386 
387 		r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib);
388 		if (r) {
389 			DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
390 			goto err1;
391 		}
392 	}
393 
394 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
395 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
396 	ib.ptr[2] = lower_32_bits(gpu_addr);
397 	ib.ptr[3] = upper_32_bits(gpu_addr);
398 	ib.ptr[4] = 0xDEADBEEF;
399 	ib.length_dw = 5;
400 
401 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
402 	if (r)
403 		goto err2;
404 
405 	r = dma_fence_wait_timeout(f, false, timeout);
406 	if (r == 0) {
407 		r = -ETIMEDOUT;
408 		goto err2;
409 	} else if (r < 0) {
410 		goto err2;
411 	}
412 
413 	if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF)
414 		r = 0;
415 	else
416 		r = -EINVAL;
417 err2:
418 	if (!ring->is_mes_queue)
419 		amdgpu_ib_free(adev, &ib, NULL);
420 	dma_fence_put(f);
421 err1:
422 	if (!ring->is_mes_queue)
423 		amdgpu_device_wb_free(adev, index);
424 	return r;
425 }
426 
427 static void gfx_v11_0_free_microcode(struct amdgpu_device *adev)
428 {
429 	release_firmware(adev->gfx.pfp_fw);
430 	adev->gfx.pfp_fw = NULL;
431 	release_firmware(adev->gfx.me_fw);
432 	adev->gfx.me_fw = NULL;
433 	release_firmware(adev->gfx.rlc_fw);
434 	adev->gfx.rlc_fw = NULL;
435 	release_firmware(adev->gfx.mec_fw);
436 	adev->gfx.mec_fw = NULL;
437 
438 	kfree(adev->gfx.rlc.register_list_format);
439 }
440 
441 static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
442 {
443 	char fw_name[40];
444 	char ucode_prefix[30];
445 	int err;
446 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
447 	uint16_t version_major;
448 	uint16_t version_minor;
449 
450 	DRM_DEBUG("\n");
451 
452 	amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
453 
454 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", ucode_prefix);
455 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
456 	if (err)
457 		goto out;
458 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
459 	if (err)
460 		goto out;
461 	/* check pfp fw hdr version to decide if enable rs64 for gfx11.*/
462 	adev->gfx.rs64_enable = amdgpu_ucode_hdr_version(
463 				(union amdgpu_firmware_header *)
464 				adev->gfx.pfp_fw->data, 2, 0);
465 	if (adev->gfx.rs64_enable) {
466 		dev_info(adev->dev, "CP RS64 enable\n");
467 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP);
468 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK);
469 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK);
470 	} else {
471 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
472 	}
473 
474 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", ucode_prefix);
475 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
476 	if (err)
477 		goto out;
478 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
479 	if (err)
480 		goto out;
481 	if (adev->gfx.rs64_enable) {
482 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME);
483 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK);
484 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK);
485 	} else {
486 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
487 	}
488 
489 	if (!amdgpu_sriov_vf(adev)) {
490 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix);
491 		err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
492 		if (err)
493 			goto out;
494 		err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
495 		if (err)
496 			goto out;
497 		rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
498 		version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
499 		version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
500 		err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
501 		if (err)
502 			goto out;
503 	}
504 
505 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", ucode_prefix);
506 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
507 	if (err)
508 		goto out;
509 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
510 	if (err)
511 		goto out;
512 	if (adev->gfx.rs64_enable) {
513 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC);
514 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK);
515 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK);
516 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK);
517 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK);
518 	} else {
519 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
520 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
521 	}
522 
523 	/* only one MEC for gfx 11.0.0. */
524 	adev->gfx.mec2_fw = NULL;
525 
526 out:
527 	if (err) {
528 		dev_err(adev->dev,
529 			"gfx11: Failed to init firmware \"%s\"\n",
530 			fw_name);
531 		release_firmware(adev->gfx.pfp_fw);
532 		adev->gfx.pfp_fw = NULL;
533 		release_firmware(adev->gfx.me_fw);
534 		adev->gfx.me_fw = NULL;
535 		release_firmware(adev->gfx.rlc_fw);
536 		adev->gfx.rlc_fw = NULL;
537 		release_firmware(adev->gfx.mec_fw);
538 		adev->gfx.mec_fw = NULL;
539 	}
540 
541 	return err;
542 }
543 
544 static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev)
545 {
546 	const struct psp_firmware_header_v1_0 *toc_hdr;
547 	int err = 0;
548 	char fw_name[40];
549 	char ucode_prefix[30];
550 
551 	amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
552 
553 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_toc.bin", ucode_prefix);
554 	err = request_firmware(&adev->psp.toc_fw, fw_name, adev->dev);
555 	if (err)
556 		goto out;
557 
558 	err = amdgpu_ucode_validate(adev->psp.toc_fw);
559 	if (err)
560 		goto out;
561 
562 	toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data;
563 	adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version);
564 	adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version);
565 	adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes);
566 	adev->psp.toc.start_addr = (uint8_t *)toc_hdr +
567 				le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes);
568 	return 0;
569 out:
570 	dev_err(adev->dev, "Failed to load TOC microcode\n");
571 	release_firmware(adev->psp.toc_fw);
572 	adev->psp.toc_fw = NULL;
573 	return err;
574 }
575 
576 static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev)
577 {
578 	u32 count = 0;
579 	const struct cs_section_def *sect = NULL;
580 	const struct cs_extent_def *ext = NULL;
581 
582 	/* begin clear state */
583 	count += 2;
584 	/* context control state */
585 	count += 3;
586 
587 	for (sect = gfx11_cs_data; sect->section != NULL; ++sect) {
588 		for (ext = sect->section; ext->extent != NULL; ++ext) {
589 			if (sect->id == SECT_CONTEXT)
590 				count += 2 + ext->reg_count;
591 			else
592 				return 0;
593 		}
594 	}
595 
596 	/* set PA_SC_TILE_STEERING_OVERRIDE */
597 	count += 3;
598 	/* end clear state */
599 	count += 2;
600 	/* clear state */
601 	count += 2;
602 
603 	return count;
604 }
605 
606 static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev,
607 				    volatile u32 *buffer)
608 {
609 	u32 count = 0, i;
610 	const struct cs_section_def *sect = NULL;
611 	const struct cs_extent_def *ext = NULL;
612 	int ctx_reg_offset;
613 
614 	if (adev->gfx.rlc.cs_data == NULL)
615 		return;
616 	if (buffer == NULL)
617 		return;
618 
619 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
620 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
621 
622 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
623 	buffer[count++] = cpu_to_le32(0x80000000);
624 	buffer[count++] = cpu_to_le32(0x80000000);
625 
626 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
627 		for (ext = sect->section; ext->extent != NULL; ++ext) {
628 			if (sect->id == SECT_CONTEXT) {
629 				buffer[count++] =
630 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
631 				buffer[count++] = cpu_to_le32(ext->reg_index -
632 						PACKET3_SET_CONTEXT_REG_START);
633 				for (i = 0; i < ext->reg_count; i++)
634 					buffer[count++] = cpu_to_le32(ext->extent[i]);
635 			} else {
636 				return;
637 			}
638 		}
639 	}
640 
641 	ctx_reg_offset =
642 		SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
643 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
644 	buffer[count++] = cpu_to_le32(ctx_reg_offset);
645 	buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override);
646 
647 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
648 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
649 
650 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
651 	buffer[count++] = cpu_to_le32(0);
652 }
653 
654 static void gfx_v11_0_rlc_fini(struct amdgpu_device *adev)
655 {
656 	/* clear state block */
657 	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
658 			&adev->gfx.rlc.clear_state_gpu_addr,
659 			(void **)&adev->gfx.rlc.cs_ptr);
660 
661 	/* jump table block */
662 	amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
663 			&adev->gfx.rlc.cp_table_gpu_addr,
664 			(void **)&adev->gfx.rlc.cp_table_ptr);
665 }
666 
667 static void gfx_v11_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
668 {
669 	struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
670 
671 	reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl;
672 	reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
673 	reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1);
674 	reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2);
675 	reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG3);
676 	reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL);
677 	reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX);
678 	reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, regRLC_SPARE_INT_0);
679 	adev->gfx.rlc.rlcg_reg_access_supported = true;
680 }
681 
682 static int gfx_v11_0_rlc_init(struct amdgpu_device *adev)
683 {
684 	const struct cs_section_def *cs_data;
685 	int r;
686 
687 	adev->gfx.rlc.cs_data = gfx11_cs_data;
688 
689 	cs_data = adev->gfx.rlc.cs_data;
690 
691 	if (cs_data) {
692 		/* init clear state block */
693 		r = amdgpu_gfx_rlc_init_csb(adev);
694 		if (r)
695 			return r;
696 	}
697 
698 	/* init spm vmid with 0xf */
699 	if (adev->gfx.rlc.funcs->update_spm_vmid)
700 		adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
701 
702 	return 0;
703 }
704 
705 static void gfx_v11_0_mec_fini(struct amdgpu_device *adev)
706 {
707 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
708 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
709 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL);
710 }
711 
712 static int gfx_v11_0_me_init(struct amdgpu_device *adev)
713 {
714 	int r;
715 
716 	bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
717 
718 	amdgpu_gfx_graphics_queue_acquire(adev);
719 
720 	r = gfx_v11_0_init_microcode(adev);
721 	if (r)
722 		DRM_ERROR("Failed to load gfx firmware!\n");
723 
724 	return r;
725 }
726 
727 static int gfx_v11_0_mec_init(struct amdgpu_device *adev)
728 {
729 	int r;
730 	u32 *hpd;
731 	size_t mec_hpd_size;
732 
733 	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
734 
735 	/* take ownership of the relevant compute queues */
736 	amdgpu_gfx_compute_queue_acquire(adev);
737 	mec_hpd_size = adev->gfx.num_compute_rings * GFX11_MEC_HPD_SIZE;
738 
739 	if (mec_hpd_size) {
740 		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
741 					      AMDGPU_GEM_DOMAIN_GTT,
742 					      &adev->gfx.mec.hpd_eop_obj,
743 					      &adev->gfx.mec.hpd_eop_gpu_addr,
744 					      (void **)&hpd);
745 		if (r) {
746 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
747 			gfx_v11_0_mec_fini(adev);
748 			return r;
749 		}
750 
751 		memset(hpd, 0, mec_hpd_size);
752 
753 		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
754 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
755 	}
756 
757 	return 0;
758 }
759 
760 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address)
761 {
762 	WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
763 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
764 		(address << SQ_IND_INDEX__INDEX__SHIFT));
765 	return RREG32_SOC15(GC, 0, regSQ_IND_DATA);
766 }
767 
768 static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave,
769 			   uint32_t thread, uint32_t regno,
770 			   uint32_t num, uint32_t *out)
771 {
772 	WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
773 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
774 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
775 		(thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) |
776 		(SQ_IND_INDEX__AUTO_INCR_MASK));
777 	while (num--)
778 		*(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA);
779 }
780 
781 static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
782 {
783 	/* in gfx11 the SIMD_ID is specified as part of the INSTANCE
784 	 * field when performing a select_se_sh so it should be
785 	 * zero here */
786 	WARN_ON(simd != 0);
787 
788 	/* type 2 wave data */
789 	dst[(*no_fields)++] = 2;
790 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS);
791 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO);
792 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI);
793 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO);
794 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI);
795 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1);
796 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2);
797 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC);
798 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC);
799 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAPSTS);
800 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS);
801 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2);
802 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1);
803 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0);
804 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE);
805 }
806 
807 static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
808 				     uint32_t wave, uint32_t start,
809 				     uint32_t size, uint32_t *dst)
810 {
811 	WARN_ON(simd != 0);
812 
813 	wave_read_regs(
814 		adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size,
815 		dst);
816 }
817 
818 static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
819 				      uint32_t wave, uint32_t thread,
820 				      uint32_t start, uint32_t size,
821 				      uint32_t *dst)
822 {
823 	wave_read_regs(
824 		adev, wave, thread,
825 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
826 }
827 
828 static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev,
829 									  u32 me, u32 pipe, u32 q, u32 vm)
830 {
831 	soc21_grbm_select(adev, me, pipe, q, vm);
832 }
833 
834 static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = {
835 	.get_gpu_clock_counter = &gfx_v11_0_get_gpu_clock_counter,
836 	.select_se_sh = &gfx_v11_0_select_se_sh,
837 	.read_wave_data = &gfx_v11_0_read_wave_data,
838 	.read_wave_sgprs = &gfx_v11_0_read_wave_sgprs,
839 	.read_wave_vgprs = &gfx_v11_0_read_wave_vgprs,
840 	.select_me_pipe_q = &gfx_v11_0_select_me_pipe_q,
841 	.update_perfmon_mgcg = &gfx_v11_0_update_perf_clk,
842 };
843 
844 static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev)
845 {
846 	adev->gfx.funcs = &gfx_v11_0_gfx_funcs;
847 
848 	switch (adev->ip_versions[GC_HWIP][0]) {
849 	case IP_VERSION(11, 0, 0):
850 	case IP_VERSION(11, 0, 2):
851 	case IP_VERSION(11, 0, 3):
852 		adev->gfx.config.max_hw_contexts = 8;
853 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
854 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
855 		adev->gfx.config.sc_hiz_tile_fifo_size = 0;
856 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
857 		break;
858 	case IP_VERSION(11, 0, 1):
859 		adev->gfx.config.max_hw_contexts = 8;
860 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
861 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
862 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
863 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x300;
864 		break;
865 	default:
866 		BUG();
867 		break;
868 	}
869 
870 	return 0;
871 }
872 
873 static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
874 				   int me, int pipe, int queue)
875 {
876 	int r;
877 	struct amdgpu_ring *ring;
878 	unsigned int irq_type;
879 
880 	ring = &adev->gfx.gfx_ring[ring_id];
881 
882 	ring->me = me;
883 	ring->pipe = pipe;
884 	ring->queue = queue;
885 
886 	ring->ring_obj = NULL;
887 	ring->use_doorbell = true;
888 
889 	if (!ring_id)
890 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
891 	else
892 		ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1;
893 	sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
894 
895 	irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
896 	r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
897 			     AMDGPU_RING_PRIO_DEFAULT, NULL);
898 	if (r)
899 		return r;
900 	return 0;
901 }
902 
903 static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
904 				       int mec, int pipe, int queue)
905 {
906 	int r;
907 	unsigned irq_type;
908 	struct amdgpu_ring *ring;
909 	unsigned int hw_prio;
910 
911 	ring = &adev->gfx.compute_ring[ring_id];
912 
913 	/* mec0 is me1 */
914 	ring->me = mec + 1;
915 	ring->pipe = pipe;
916 	ring->queue = queue;
917 
918 	ring->ring_obj = NULL;
919 	ring->use_doorbell = true;
920 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
921 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
922 				+ (ring_id * GFX11_MEC_HPD_SIZE);
923 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
924 
925 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
926 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
927 		+ ring->pipe;
928 	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
929 			AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
930 	/* type-2 packets are deprecated on MEC, use type-3 instead */
931 	r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
932 			     hw_prio, NULL);
933 	if (r)
934 		return r;
935 
936 	return 0;
937 }
938 
939 static struct {
940 	SOC21_FIRMWARE_ID	id;
941 	unsigned int		offset;
942 	unsigned int		size;
943 } rlc_autoload_info[SOC21_FIRMWARE_ID_MAX];
944 
945 static void gfx_v11_0_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc)
946 {
947 	RLC_TABLE_OF_CONTENT *ucode = rlc_toc;
948 
949 	while (ucode && (ucode->id > SOC21_FIRMWARE_ID_INVALID) &&
950 			(ucode->id < SOC21_FIRMWARE_ID_MAX)) {
951 		rlc_autoload_info[ucode->id].id = ucode->id;
952 		rlc_autoload_info[ucode->id].offset = ucode->offset * 4;
953 		rlc_autoload_info[ucode->id].size = ucode->size * 4;
954 
955 		ucode++;
956 	}
957 }
958 
959 static uint32_t gfx_v11_0_calc_toc_total_size(struct amdgpu_device *adev)
960 {
961 	uint32_t total_size = 0;
962 	SOC21_FIRMWARE_ID id;
963 
964 	gfx_v11_0_parse_rlc_toc(adev, adev->psp.toc.start_addr);
965 
966 	for (id = SOC21_FIRMWARE_ID_RLC_G_UCODE; id < SOC21_FIRMWARE_ID_MAX; id++)
967 		total_size += rlc_autoload_info[id].size;
968 
969 	/* In case the offset in rlc toc ucode is aligned */
970 	if (total_size < rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset)
971 		total_size = rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset +
972 			rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].size;
973 
974 	return total_size;
975 }
976 
977 static int gfx_v11_0_rlc_autoload_buffer_init(struct amdgpu_device *adev)
978 {
979 	int r;
980 	uint32_t total_size;
981 
982 	total_size = gfx_v11_0_calc_toc_total_size(adev);
983 
984 	r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024,
985 			AMDGPU_GEM_DOMAIN_VRAM,
986 			&adev->gfx.rlc.rlc_autoload_bo,
987 			&adev->gfx.rlc.rlc_autoload_gpu_addr,
988 			(void **)&adev->gfx.rlc.rlc_autoload_ptr);
989 
990 	if (r) {
991 		dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r);
992 		return r;
993 	}
994 
995 	return 0;
996 }
997 
998 static void gfx_v11_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev,
999 					      SOC21_FIRMWARE_ID id,
1000 			    		      const void *fw_data,
1001 					      uint32_t fw_size,
1002 					      uint32_t *fw_autoload_mask)
1003 {
1004 	uint32_t toc_offset;
1005 	uint32_t toc_fw_size;
1006 	char *ptr = adev->gfx.rlc.rlc_autoload_ptr;
1007 
1008 	if (id <= SOC21_FIRMWARE_ID_INVALID || id >= SOC21_FIRMWARE_ID_MAX)
1009 		return;
1010 
1011 	toc_offset = rlc_autoload_info[id].offset;
1012 	toc_fw_size = rlc_autoload_info[id].size;
1013 
1014 	if (fw_size == 0)
1015 		fw_size = toc_fw_size;
1016 
1017 	if (fw_size > toc_fw_size)
1018 		fw_size = toc_fw_size;
1019 
1020 	memcpy(ptr + toc_offset, fw_data, fw_size);
1021 
1022 	if (fw_size < toc_fw_size)
1023 		memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size);
1024 
1025 	if ((id != SOC21_FIRMWARE_ID_RS64_PFP) && (id != SOC21_FIRMWARE_ID_RS64_ME))
1026 		*(uint64_t *)fw_autoload_mask |= 1ULL << id;
1027 }
1028 
1029 static void gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev,
1030 							uint32_t *fw_autoload_mask)
1031 {
1032 	void *data;
1033 	uint32_t size;
1034 	uint64_t *toc_ptr;
1035 
1036 	*(uint64_t *)fw_autoload_mask |= 0x1;
1037 
1038 	DRM_DEBUG("rlc autoload enabled fw: 0x%llx\n", *(uint64_t *)fw_autoload_mask);
1039 
1040 	data = adev->psp.toc.start_addr;
1041 	size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_TOC].size;
1042 
1043 	toc_ptr = (uint64_t *)data + size / 8 - 1;
1044 	*toc_ptr = *(uint64_t *)fw_autoload_mask;
1045 
1046 	gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_TOC,
1047 					data, size, fw_autoload_mask);
1048 }
1049 
1050 static void gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev,
1051 							uint32_t *fw_autoload_mask)
1052 {
1053 	const __le32 *fw_data;
1054 	uint32_t fw_size;
1055 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1056 	const struct gfx_firmware_header_v2_0 *cpv2_hdr;
1057 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1058 	const struct rlc_firmware_header_v2_2 *rlcv22_hdr;
1059 	uint16_t version_major, version_minor;
1060 
1061 	if (adev->gfx.rs64_enable) {
1062 		/* pfp ucode */
1063 		cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1064 			adev->gfx.pfp_fw->data;
1065 		/* instruction */
1066 		fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1067 			le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1068 		fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1069 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP,
1070 						fw_data, fw_size, fw_autoload_mask);
1071 		/* data */
1072 		fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1073 			le32_to_cpu(cpv2_hdr->data_offset_bytes));
1074 		fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1075 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK,
1076 						fw_data, fw_size, fw_autoload_mask);
1077 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P1_STACK,
1078 						fw_data, fw_size, fw_autoload_mask);
1079 		/* me ucode */
1080 		cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1081 			adev->gfx.me_fw->data;
1082 		/* instruction */
1083 		fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1084 			le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1085 		fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1086 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME,
1087 						fw_data, fw_size, fw_autoload_mask);
1088 		/* data */
1089 		fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1090 			le32_to_cpu(cpv2_hdr->data_offset_bytes));
1091 		fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1092 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P0_STACK,
1093 						fw_data, fw_size, fw_autoload_mask);
1094 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P1_STACK,
1095 						fw_data, fw_size, fw_autoload_mask);
1096 		/* mec ucode */
1097 		cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1098 			adev->gfx.mec_fw->data;
1099 		/* instruction */
1100 		fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1101 			le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1102 		fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1103 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC,
1104 						fw_data, fw_size, fw_autoload_mask);
1105 		/* data */
1106 		fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1107 			le32_to_cpu(cpv2_hdr->data_offset_bytes));
1108 		fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1109 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK,
1110 						fw_data, fw_size, fw_autoload_mask);
1111 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P1_STACK,
1112 						fw_data, fw_size, fw_autoload_mask);
1113 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P2_STACK,
1114 						fw_data, fw_size, fw_autoload_mask);
1115 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P3_STACK,
1116 						fw_data, fw_size, fw_autoload_mask);
1117 	} else {
1118 		/* pfp ucode */
1119 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1120 			adev->gfx.pfp_fw->data;
1121 		fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1122 				le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
1123 		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1124 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_PFP,
1125 						fw_data, fw_size, fw_autoload_mask);
1126 
1127 		/* me ucode */
1128 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1129 			adev->gfx.me_fw->data;
1130 		fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1131 				le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
1132 		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1133 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_ME,
1134 						fw_data, fw_size, fw_autoload_mask);
1135 
1136 		/* mec ucode */
1137 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1138 			adev->gfx.mec_fw->data;
1139 		fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1140 				le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
1141 		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
1142 			cp_hdr->jt_size * 4;
1143 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_MEC,
1144 						fw_data, fw_size, fw_autoload_mask);
1145 	}
1146 
1147 	/* rlc ucode */
1148 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)
1149 		adev->gfx.rlc_fw->data;
1150 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1151 			le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes));
1152 	fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes);
1153 	gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_G_UCODE,
1154 					fw_data, fw_size, fw_autoload_mask);
1155 
1156 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1157 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1158 	if (version_major == 2) {
1159 		if (version_minor >= 2) {
1160 			rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
1161 
1162 			fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1163 					le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes));
1164 			fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes);
1165 			gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_UCODE,
1166 					fw_data, fw_size, fw_autoload_mask);
1167 
1168 			fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1169 					le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes));
1170 			fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes);
1171 			gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_DRAM_BOOT,
1172 					fw_data, fw_size, fw_autoload_mask);
1173 		}
1174 	}
1175 }
1176 
1177 static void gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev,
1178 							uint32_t *fw_autoload_mask)
1179 {
1180 	const __le32 *fw_data;
1181 	uint32_t fw_size;
1182 	const struct sdma_firmware_header_v2_0 *sdma_hdr;
1183 
1184 	sdma_hdr = (const struct sdma_firmware_header_v2_0 *)
1185 		adev->sdma.instance[0].fw->data;
1186 	fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
1187 			le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes));
1188 	fw_size = le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes);
1189 
1190 	gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1191 			SOC21_FIRMWARE_ID_SDMA_UCODE_TH0, fw_data, fw_size, fw_autoload_mask);
1192 
1193 	fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
1194 			le32_to_cpu(sdma_hdr->ctl_ucode_offset));
1195 	fw_size = le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes);
1196 
1197 	gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1198 			SOC21_FIRMWARE_ID_SDMA_UCODE_TH1, fw_data, fw_size, fw_autoload_mask);
1199 }
1200 
1201 static void gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev,
1202 							uint32_t *fw_autoload_mask)
1203 {
1204 	const __le32 *fw_data;
1205 	unsigned fw_size;
1206 	const struct mes_firmware_header_v1_0 *mes_hdr;
1207 	int pipe, ucode_id, data_id;
1208 
1209 	for (pipe = 0; pipe < 2; pipe++) {
1210 		if (pipe==0) {
1211 			ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P0;
1212 			data_id  = SOC21_FIRMWARE_ID_RS64_MES_P0_STACK;
1213 		} else {
1214 			ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P1;
1215 			data_id  = SOC21_FIRMWARE_ID_RS64_MES_P1_STACK;
1216 		}
1217 
1218 		mes_hdr = (const struct mes_firmware_header_v1_0 *)
1219 			adev->mes.fw[pipe]->data;
1220 
1221 		fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
1222 				le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
1223 		fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
1224 
1225 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1226 				ucode_id, fw_data, fw_size, fw_autoload_mask);
1227 
1228 		fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
1229 				le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
1230 		fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
1231 
1232 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1233 				data_id, fw_data, fw_size, fw_autoload_mask);
1234 	}
1235 }
1236 
1237 static int gfx_v11_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev)
1238 {
1239 	uint32_t rlc_g_offset, rlc_g_size;
1240 	uint64_t gpu_addr;
1241 	uint32_t autoload_fw_id[2];
1242 
1243 	memset(autoload_fw_id, 0, sizeof(uint32_t) * 2);
1244 
1245 	/* RLC autoload sequence 2: copy ucode */
1246 	gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(adev, autoload_fw_id);
1247 	gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(adev, autoload_fw_id);
1248 	gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(adev, autoload_fw_id);
1249 	gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(adev, autoload_fw_id);
1250 
1251 	rlc_g_offset = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].offset;
1252 	rlc_g_size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].size;
1253 	gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset;
1254 
1255 	WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, upper_32_bits(gpu_addr));
1256 	WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, lower_32_bits(gpu_addr));
1257 
1258 	WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_SIZE, rlc_g_size);
1259 
1260 	/* RLC autoload sequence 3: load IMU fw */
1261 	if (adev->gfx.imu.funcs->load_microcode)
1262 		adev->gfx.imu.funcs->load_microcode(adev);
1263 	/* RLC autoload sequence 4 init IMU fw */
1264 	if (adev->gfx.imu.funcs->setup_imu)
1265 		adev->gfx.imu.funcs->setup_imu(adev);
1266 	if (adev->gfx.imu.funcs->start_imu)
1267 		adev->gfx.imu.funcs->start_imu(adev);
1268 
1269 	/* RLC autoload sequence 5 disable gpa mode */
1270 	gfx_v11_0_disable_gpa_mode(adev);
1271 
1272 	return 0;
1273 }
1274 
1275 static int gfx_v11_0_sw_init(void *handle)
1276 {
1277 	int i, j, k, r, ring_id = 0;
1278 	struct amdgpu_kiq *kiq;
1279 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1280 
1281 	adev->gfxhub.funcs->init(adev);
1282 
1283 	switch (adev->ip_versions[GC_HWIP][0]) {
1284 	case IP_VERSION(11, 0, 0):
1285 	case IP_VERSION(11, 0, 1):
1286 	case IP_VERSION(11, 0, 2):
1287 	case IP_VERSION(11, 0, 3):
1288 		adev->gfx.me.num_me = 1;
1289 		adev->gfx.me.num_pipe_per_me = 1;
1290 		adev->gfx.me.num_queue_per_pipe = 1;
1291 		adev->gfx.mec.num_mec = 2;
1292 		adev->gfx.mec.num_pipe_per_mec = 4;
1293 		adev->gfx.mec.num_queue_per_pipe = 4;
1294 		break;
1295 	default:
1296 		adev->gfx.me.num_me = 1;
1297 		adev->gfx.me.num_pipe_per_me = 1;
1298 		adev->gfx.me.num_queue_per_pipe = 1;
1299 		adev->gfx.mec.num_mec = 1;
1300 		adev->gfx.mec.num_pipe_per_mec = 4;
1301 		adev->gfx.mec.num_queue_per_pipe = 8;
1302 		break;
1303 	}
1304 
1305 	/* EOP Event */
1306 	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1307 			      GFX_11_0_0__SRCID__CP_EOP_INTERRUPT,
1308 			      &adev->gfx.eop_irq);
1309 	if (r)
1310 		return r;
1311 
1312 	/* Privileged reg */
1313 	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1314 			      GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT,
1315 			      &adev->gfx.priv_reg_irq);
1316 	if (r)
1317 		return r;
1318 
1319 	/* Privileged inst */
1320 	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1321 			      GFX_11_0_0__SRCID__CP_PRIV_INSTR_FAULT,
1322 			      &adev->gfx.priv_inst_irq);
1323 	if (r)
1324 		return r;
1325 
1326 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1327 
1328 	if (adev->gfx.imu.funcs) {
1329 		if (adev->gfx.imu.funcs->init_microcode) {
1330 			r = adev->gfx.imu.funcs->init_microcode(adev);
1331 			if (r)
1332 				DRM_ERROR("Failed to load imu firmware!\n");
1333 		}
1334 	}
1335 
1336 	r = gfx_v11_0_me_init(adev);
1337 	if (r)
1338 		return r;
1339 
1340 	r = gfx_v11_0_rlc_init(adev);
1341 	if (r) {
1342 		DRM_ERROR("Failed to init rlc BOs!\n");
1343 		return r;
1344 	}
1345 
1346 	r = gfx_v11_0_mec_init(adev);
1347 	if (r) {
1348 		DRM_ERROR("Failed to init MEC BOs!\n");
1349 		return r;
1350 	}
1351 
1352 	/* set up the gfx ring */
1353 	for (i = 0; i < adev->gfx.me.num_me; i++) {
1354 		for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
1355 			for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
1356 				if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
1357 					continue;
1358 
1359 				r = gfx_v11_0_gfx_ring_init(adev, ring_id,
1360 							    i, k, j);
1361 				if (r)
1362 					return r;
1363 				ring_id++;
1364 			}
1365 		}
1366 	}
1367 
1368 	ring_id = 0;
1369 	/* set up the compute queues - allocate horizontally across pipes */
1370 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1371 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1372 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1373 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k,
1374 								     j))
1375 					continue;
1376 
1377 				r = gfx_v11_0_compute_ring_init(adev, ring_id,
1378 								i, k, j);
1379 				if (r)
1380 					return r;
1381 
1382 				ring_id++;
1383 			}
1384 		}
1385 	}
1386 
1387 	if (!adev->enable_mes_kiq) {
1388 		r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE);
1389 		if (r) {
1390 			DRM_ERROR("Failed to init KIQ BOs!\n");
1391 			return r;
1392 		}
1393 
1394 		kiq = &adev->gfx.kiq;
1395 		r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
1396 		if (r)
1397 			return r;
1398 	}
1399 
1400 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v11_compute_mqd));
1401 	if (r)
1402 		return r;
1403 
1404 	/* allocate visible FB for rlc auto-loading fw */
1405 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
1406 		r = gfx_v11_0_init_toc_microcode(adev);
1407 		if (r)
1408 			dev_err(adev->dev, "Failed to load toc firmware!\n");
1409 		r = gfx_v11_0_rlc_autoload_buffer_init(adev);
1410 		if (r)
1411 			return r;
1412 	}
1413 
1414 	r = gfx_v11_0_gpu_early_init(adev);
1415 	if (r)
1416 		return r;
1417 
1418 	return 0;
1419 }
1420 
1421 static void gfx_v11_0_pfp_fini(struct amdgpu_device *adev)
1422 {
1423 	amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj,
1424 			      &adev->gfx.pfp.pfp_fw_gpu_addr,
1425 			      (void **)&adev->gfx.pfp.pfp_fw_ptr);
1426 
1427 	amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_data_obj,
1428 			      &adev->gfx.pfp.pfp_fw_data_gpu_addr,
1429 			      (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
1430 }
1431 
1432 static void gfx_v11_0_me_fini(struct amdgpu_device *adev)
1433 {
1434 	amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,
1435 			      &adev->gfx.me.me_fw_gpu_addr,
1436 			      (void **)&adev->gfx.me.me_fw_ptr);
1437 
1438 	amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_data_obj,
1439 			       &adev->gfx.me.me_fw_data_gpu_addr,
1440 			       (void **)&adev->gfx.me.me_fw_data_ptr);
1441 }
1442 
1443 static void gfx_v11_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev)
1444 {
1445 	amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo,
1446 			&adev->gfx.rlc.rlc_autoload_gpu_addr,
1447 			(void **)&adev->gfx.rlc.rlc_autoload_ptr);
1448 }
1449 
1450 static int gfx_v11_0_sw_fini(void *handle)
1451 {
1452 	int i;
1453 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1454 
1455 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1456 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1457 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
1458 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1459 
1460 	amdgpu_gfx_mqd_sw_fini(adev);
1461 
1462 	if (!adev->enable_mes_kiq) {
1463 		amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
1464 		amdgpu_gfx_kiq_fini(adev);
1465 	}
1466 
1467 	gfx_v11_0_pfp_fini(adev);
1468 	gfx_v11_0_me_fini(adev);
1469 	gfx_v11_0_rlc_fini(adev);
1470 	gfx_v11_0_mec_fini(adev);
1471 
1472 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
1473 		gfx_v11_0_rlc_autoload_buffer_fini(adev);
1474 
1475 	gfx_v11_0_free_microcode(adev);
1476 
1477 	return 0;
1478 }
1479 
1480 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
1481 				   u32 sh_num, u32 instance)
1482 {
1483 	u32 data;
1484 
1485 	if (instance == 0xffffffff)
1486 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
1487 				     INSTANCE_BROADCAST_WRITES, 1);
1488 	else
1489 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX,
1490 				     instance);
1491 
1492 	if (se_num == 0xffffffff)
1493 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES,
1494 				     1);
1495 	else
1496 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
1497 
1498 	if (sh_num == 0xffffffff)
1499 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES,
1500 				     1);
1501 	else
1502 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num);
1503 
1504 	WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data);
1505 }
1506 
1507 static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1508 {
1509 	u32 data, mask;
1510 
1511 	data = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE);
1512 	data |= RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE);
1513 
1514 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1515 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
1516 
1517 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
1518 					 adev->gfx.config.max_sh_per_se);
1519 
1520 	return (~data) & mask;
1521 }
1522 
1523 static void gfx_v11_0_setup_rb(struct amdgpu_device *adev)
1524 {
1525 	int i, j;
1526 	u32 data;
1527 	u32 active_rbs = 0;
1528 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
1529 					adev->gfx.config.max_sh_per_se;
1530 
1531 	mutex_lock(&adev->grbm_idx_mutex);
1532 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1533 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1534 			gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff);
1535 			data = gfx_v11_0_get_rb_active_bitmap(adev);
1536 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
1537 					       rb_bitmap_width_per_sh);
1538 		}
1539 	}
1540 	gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1541 	mutex_unlock(&adev->grbm_idx_mutex);
1542 
1543 	adev->gfx.config.backend_enable_mask = active_rbs;
1544 	adev->gfx.config.num_rbs = hweight32(active_rbs);
1545 }
1546 
1547 #define DEFAULT_SH_MEM_BASES	(0x6000)
1548 #define LDS_APP_BASE           0x1
1549 #define SCRATCH_APP_BASE       0x2
1550 
1551 static void gfx_v11_0_init_compute_vmid(struct amdgpu_device *adev)
1552 {
1553 	int i;
1554 	uint32_t sh_mem_bases;
1555 	uint32_t data;
1556 
1557 	/*
1558 	 * Configure apertures:
1559 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
1560 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
1561 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
1562 	 */
1563 	sh_mem_bases = (LDS_APP_BASE << SH_MEM_BASES__SHARED_BASE__SHIFT) |
1564 			SCRATCH_APP_BASE;
1565 
1566 	mutex_lock(&adev->srbm_mutex);
1567 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
1568 		soc21_grbm_select(adev, 0, 0, 0, i);
1569 		/* CP and shaders */
1570 		WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
1571 		WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases);
1572 
1573 		/* Enable trap for each kfd vmid. */
1574 		data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL);
1575 		data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
1576 	}
1577 	soc21_grbm_select(adev, 0, 0, 0, 0);
1578 	mutex_unlock(&adev->srbm_mutex);
1579 
1580 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
1581 	   acccess. These should be enabled by FW for target VMIDs. */
1582 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
1583 		WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * i, 0);
1584 		WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * i, 0);
1585 		WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, i, 0);
1586 		WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, i, 0);
1587 	}
1588 }
1589 
1590 static void gfx_v11_0_init_gds_vmid(struct amdgpu_device *adev)
1591 {
1592 	int vmid;
1593 
1594 	/*
1595 	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
1596 	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
1597 	 * the driver can enable them for graphics. VMID0 should maintain
1598 	 * access so that HWS firmware can save/restore entries.
1599 	 */
1600 	for (vmid = 1; vmid < 16; vmid++) {
1601 		WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * vmid, 0);
1602 		WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * vmid, 0);
1603 		WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, vmid, 0);
1604 		WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, vmid, 0);
1605 	}
1606 }
1607 
1608 static void gfx_v11_0_tcp_harvest(struct amdgpu_device *adev)
1609 {
1610 	/* TODO: harvest feature to be added later. */
1611 }
1612 
1613 static void gfx_v11_0_get_tcc_info(struct amdgpu_device *adev)
1614 {
1615 	/* TCCs are global (not instanced). */
1616 	uint32_t tcc_disable = RREG32_SOC15(GC, 0, regCGTS_TCC_DISABLE) |
1617 			       RREG32_SOC15(GC, 0, regCGTS_USER_TCC_DISABLE);
1618 
1619 	adev->gfx.config.tcc_disabled_mask =
1620 		REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) |
1621 		(REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16);
1622 }
1623 
1624 static void gfx_v11_0_constants_init(struct amdgpu_device *adev)
1625 {
1626 	u32 tmp;
1627 	int i;
1628 
1629 	if (!amdgpu_sriov_vf(adev))
1630 		WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
1631 
1632 	gfx_v11_0_setup_rb(adev);
1633 	gfx_v11_0_get_cu_info(adev, &adev->gfx.cu_info);
1634 	gfx_v11_0_get_tcc_info(adev);
1635 	adev->gfx.config.pa_sc_tile_steering_override = 0;
1636 
1637 	/* XXX SH_MEM regs */
1638 	/* where to put LDS, scratch, GPUVM in FSA64 space */
1639 	mutex_lock(&adev->srbm_mutex);
1640 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
1641 		soc21_grbm_select(adev, 0, 0, 0, i);
1642 		/* CP and shaders */
1643 		WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
1644 		if (i != 0) {
1645 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
1646 				(adev->gmc.private_aperture_start >> 48));
1647 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
1648 				(adev->gmc.shared_aperture_start >> 48));
1649 			WREG32_SOC15(GC, 0, regSH_MEM_BASES, tmp);
1650 		}
1651 	}
1652 	soc21_grbm_select(adev, 0, 0, 0, 0);
1653 
1654 	mutex_unlock(&adev->srbm_mutex);
1655 
1656 	gfx_v11_0_init_compute_vmid(adev);
1657 	gfx_v11_0_init_gds_vmid(adev);
1658 }
1659 
1660 static void gfx_v11_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
1661 					       bool enable)
1662 {
1663 	u32 tmp;
1664 
1665 	if (amdgpu_sriov_vf(adev))
1666 		return;
1667 
1668 	tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0);
1669 
1670 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
1671 			    enable ? 1 : 0);
1672 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
1673 			    enable ? 1 : 0);
1674 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
1675 			    enable ? 1 : 0);
1676 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
1677 			    enable ? 1 : 0);
1678 
1679 	WREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0, tmp);
1680 }
1681 
1682 static int gfx_v11_0_init_csb(struct amdgpu_device *adev)
1683 {
1684 	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
1685 
1686 	WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_HI,
1687 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
1688 	WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_LO,
1689 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
1690 	WREG32_SOC15(GC, 0, regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
1691 
1692 	return 0;
1693 }
1694 
1695 static void gfx_v11_0_rlc_stop(struct amdgpu_device *adev)
1696 {
1697 	u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL);
1698 
1699 	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
1700 	WREG32_SOC15(GC, 0, regRLC_CNTL, tmp);
1701 }
1702 
1703 static void gfx_v11_0_rlc_reset(struct amdgpu_device *adev)
1704 {
1705 	WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
1706 	udelay(50);
1707 	WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
1708 	udelay(50);
1709 }
1710 
1711 static void gfx_v11_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev,
1712 					     bool enable)
1713 {
1714 	uint32_t rlc_pg_cntl;
1715 
1716 	rlc_pg_cntl = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
1717 
1718 	if (!enable) {
1719 		/* RLC_PG_CNTL[23] = 0 (default)
1720 		 * RLC will wait for handshake acks with SMU
1721 		 * GFXOFF will be enabled
1722 		 * RLC_PG_CNTL[23] = 1
1723 		 * RLC will not issue any message to SMU
1724 		 * hence no handshake between SMU & RLC
1725 		 * GFXOFF will be disabled
1726 		 */
1727 		rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
1728 	} else
1729 		rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
1730 	WREG32_SOC15(GC, 0, regRLC_PG_CNTL, rlc_pg_cntl);
1731 }
1732 
1733 static void gfx_v11_0_rlc_start(struct amdgpu_device *adev)
1734 {
1735 	/* TODO: enable rlc & smu handshake until smu
1736 	 * and gfxoff feature works as expected */
1737 	if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK))
1738 		gfx_v11_0_rlc_smu_handshake_cntl(adev, false);
1739 
1740 	WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
1741 	udelay(50);
1742 }
1743 
1744 static void gfx_v11_0_rlc_enable_srm(struct amdgpu_device *adev)
1745 {
1746 	uint32_t tmp;
1747 
1748 	/* enable Save Restore Machine */
1749 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL));
1750 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
1751 	tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
1752 	WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL), tmp);
1753 }
1754 
1755 static void gfx_v11_0_load_rlcg_microcode(struct amdgpu_device *adev)
1756 {
1757 	const struct rlc_firmware_header_v2_0 *hdr;
1758 	const __le32 *fw_data;
1759 	unsigned i, fw_size;
1760 
1761 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1762 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1763 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1764 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1765 
1766 	WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR,
1767 		     RLCG_UCODE_LOADING_START_ADDRESS);
1768 
1769 	for (i = 0; i < fw_size; i++)
1770 		WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA,
1771 			     le32_to_cpup(fw_data++));
1772 
1773 	WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
1774 }
1775 
1776 static void gfx_v11_0_load_rlc_iram_dram_microcode(struct amdgpu_device *adev)
1777 {
1778 	const struct rlc_firmware_header_v2_2 *hdr;
1779 	const __le32 *fw_data;
1780 	unsigned i, fw_size;
1781 	u32 tmp;
1782 
1783 	hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
1784 
1785 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1786 			le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes));
1787 	fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4;
1788 
1789 	WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, 0);
1790 
1791 	for (i = 0; i < fw_size; i++) {
1792 		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
1793 			msleep(1);
1794 		WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_DATA,
1795 				le32_to_cpup(fw_data++));
1796 	}
1797 
1798 	WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
1799 
1800 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1801 			le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes));
1802 	fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4;
1803 
1804 	WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_ADDR, 0);
1805 	for (i = 0; i < fw_size; i++) {
1806 		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
1807 			msleep(1);
1808 		WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_DATA,
1809 				le32_to_cpup(fw_data++));
1810 	}
1811 
1812 	WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
1813 
1814 	tmp = RREG32_SOC15(GC, 0, regRLC_LX6_CNTL);
1815 	tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1);
1816 	tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0);
1817 	WREG32_SOC15(GC, 0, regRLC_LX6_CNTL, tmp);
1818 }
1819 
1820 static void gfx_v11_0_load_rlcp_rlcv_microcode(struct amdgpu_device *adev)
1821 {
1822 	const struct rlc_firmware_header_v2_3 *hdr;
1823 	const __le32 *fw_data;
1824 	unsigned i, fw_size;
1825 	u32 tmp;
1826 
1827 	hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data;
1828 
1829 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1830 			le32_to_cpu(hdr->rlcp_ucode_offset_bytes));
1831 	fw_size = le32_to_cpu(hdr->rlcp_ucode_size_bytes) / 4;
1832 
1833 	WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, 0);
1834 
1835 	for (i = 0; i < fw_size; i++) {
1836 		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
1837 			msleep(1);
1838 		WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_DATA,
1839 				le32_to_cpup(fw_data++));
1840 	}
1841 
1842 	WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, adev->gfx.rlc_fw_version);
1843 
1844 	tmp = RREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE);
1845 	tmp = REG_SET_FIELD(tmp, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1);
1846 	WREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE, tmp);
1847 
1848 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1849 			le32_to_cpu(hdr->rlcv_ucode_offset_bytes));
1850 	fw_size = le32_to_cpu(hdr->rlcv_ucode_size_bytes) / 4;
1851 
1852 	WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, 0);
1853 
1854 	for (i = 0; i < fw_size; i++) {
1855 		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
1856 			msleep(1);
1857 		WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_DATA,
1858 				le32_to_cpup(fw_data++));
1859 	}
1860 
1861 	WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, adev->gfx.rlc_fw_version);
1862 
1863 	tmp = RREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL);
1864 	tmp = REG_SET_FIELD(tmp, RLC_GPU_IOV_F32_CNTL, ENABLE, 1);
1865 	WREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL, tmp);
1866 }
1867 
1868 static int gfx_v11_0_rlc_load_microcode(struct amdgpu_device *adev)
1869 {
1870 	const struct rlc_firmware_header_v2_0 *hdr;
1871 	uint16_t version_major;
1872 	uint16_t version_minor;
1873 
1874 	if (!adev->gfx.rlc_fw)
1875 		return -EINVAL;
1876 
1877 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1878 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
1879 
1880 	version_major = le16_to_cpu(hdr->header.header_version_major);
1881 	version_minor = le16_to_cpu(hdr->header.header_version_minor);
1882 
1883 	if (version_major == 2) {
1884 		gfx_v11_0_load_rlcg_microcode(adev);
1885 		if (amdgpu_dpm == 1) {
1886 			if (version_minor >= 2)
1887 				gfx_v11_0_load_rlc_iram_dram_microcode(adev);
1888 			if (version_minor == 3)
1889 				gfx_v11_0_load_rlcp_rlcv_microcode(adev);
1890 		}
1891 
1892 		return 0;
1893 	}
1894 
1895 	return -EINVAL;
1896 }
1897 
1898 static int gfx_v11_0_rlc_resume(struct amdgpu_device *adev)
1899 {
1900 	int r;
1901 
1902 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1903 		gfx_v11_0_init_csb(adev);
1904 
1905 		if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */
1906 			gfx_v11_0_rlc_enable_srm(adev);
1907 	} else {
1908 		if (amdgpu_sriov_vf(adev)) {
1909 			gfx_v11_0_init_csb(adev);
1910 			return 0;
1911 		}
1912 
1913 		adev->gfx.rlc.funcs->stop(adev);
1914 
1915 		/* disable CG */
1916 		WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0);
1917 
1918 		/* disable PG */
1919 		WREG32_SOC15(GC, 0, regRLC_PG_CNTL, 0);
1920 
1921 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
1922 			/* legacy rlc firmware loading */
1923 			r = gfx_v11_0_rlc_load_microcode(adev);
1924 			if (r)
1925 				return r;
1926 		}
1927 
1928 		gfx_v11_0_init_csb(adev);
1929 
1930 		adev->gfx.rlc.funcs->start(adev);
1931 	}
1932 	return 0;
1933 }
1934 
1935 static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr)
1936 {
1937 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
1938 	uint32_t tmp;
1939 	int i;
1940 
1941 	/* Trigger an invalidation of the L1 instruction caches */
1942 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
1943 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1);
1944 	WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
1945 
1946 	/* Wait for invalidation complete */
1947 	for (i = 0; i < usec_timeout; i++) {
1948 		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
1949 		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
1950 					INVALIDATE_CACHE_COMPLETE))
1951 			break;
1952 		udelay(1);
1953 	}
1954 
1955 	if (i >= usec_timeout) {
1956 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
1957 		return -EINVAL;
1958 	}
1959 
1960 	if (amdgpu_emu_mode == 1)
1961 		adev->hdp.funcs->flush_hdp(adev, NULL);
1962 
1963 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
1964 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
1965 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
1966 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
1967 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
1968 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
1969 
1970 	/* Program me ucode address into intruction cache address register */
1971 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
1972 			lower_32_bits(addr) & 0xFFFFF000);
1973 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
1974 			upper_32_bits(addr));
1975 
1976 	return 0;
1977 }
1978 
1979 static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr)
1980 {
1981 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
1982 	uint32_t tmp;
1983 	int i;
1984 
1985 	/* Trigger an invalidation of the L1 instruction caches */
1986 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
1987 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1);
1988 	WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
1989 
1990 	/* Wait for invalidation complete */
1991 	for (i = 0; i < usec_timeout; i++) {
1992 		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
1993 		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
1994 					INVALIDATE_CACHE_COMPLETE))
1995 			break;
1996 		udelay(1);
1997 	}
1998 
1999 	if (i >= usec_timeout) {
2000 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2001 		return -EINVAL;
2002 	}
2003 
2004 	if (amdgpu_emu_mode == 1)
2005 		adev->hdp.funcs->flush_hdp(adev, NULL);
2006 
2007 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
2008 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
2009 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
2010 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
2011 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
2012 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
2013 
2014 	/* Program pfp ucode address into intruction cache address register */
2015 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
2016 			lower_32_bits(addr) & 0xFFFFF000);
2017 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
2018 			upper_32_bits(addr));
2019 
2020 	return 0;
2021 }
2022 
2023 static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr)
2024 {
2025 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2026 	uint32_t tmp;
2027 	int i;
2028 
2029 	/* Trigger an invalidation of the L1 instruction caches */
2030 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2031 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2032 
2033 	WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
2034 
2035 	/* Wait for invalidation complete */
2036 	for (i = 0; i < usec_timeout; i++) {
2037 		tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2038 		if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
2039 					INVALIDATE_CACHE_COMPLETE))
2040 			break;
2041 		udelay(1);
2042 	}
2043 
2044 	if (i >= usec_timeout) {
2045 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2046 		return -EINVAL;
2047 	}
2048 
2049 	if (amdgpu_emu_mode == 1)
2050 		adev->hdp.funcs->flush_hdp(adev, NULL);
2051 
2052 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
2053 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2054 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
2055 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
2056 	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
2057 
2058 	/* Program mec1 ucode address into intruction cache address register */
2059 	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO,
2060 			lower_32_bits(addr) & 0xFFFFF000);
2061 	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
2062 			upper_32_bits(addr));
2063 
2064 	return 0;
2065 }
2066 
2067 static int gfx_v11_0_config_pfp_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
2068 {
2069 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2070 	uint32_t tmp;
2071 	unsigned i, pipe_id;
2072 	const struct gfx_firmware_header_v2_0 *pfp_hdr;
2073 
2074 	pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
2075 		adev->gfx.pfp_fw->data;
2076 
2077 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
2078 		lower_32_bits(addr));
2079 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
2080 		upper_32_bits(addr));
2081 
2082 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
2083 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
2084 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
2085 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
2086 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
2087 
2088 	/*
2089 	 * Programming any of the CP_PFP_IC_BASE registers
2090 	 * forces invalidation of the ME L1 I$. Wait for the
2091 	 * invalidation complete
2092 	 */
2093 	for (i = 0; i < usec_timeout; i++) {
2094 		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2095 		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2096 			INVALIDATE_CACHE_COMPLETE))
2097 			break;
2098 		udelay(1);
2099 	}
2100 
2101 	if (i >= usec_timeout) {
2102 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2103 		return -EINVAL;
2104 	}
2105 
2106 	/* Prime the L1 instruction caches */
2107 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2108 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
2109 	WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
2110 	/* Waiting for cache primed*/
2111 	for (i = 0; i < usec_timeout; i++) {
2112 		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2113 		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2114 			ICACHE_PRIMED))
2115 			break;
2116 		udelay(1);
2117 	}
2118 
2119 	if (i >= usec_timeout) {
2120 		dev_err(adev->dev, "failed to prime instruction cache\n");
2121 		return -EINVAL;
2122 	}
2123 
2124 	mutex_lock(&adev->srbm_mutex);
2125 	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2126 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2127 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
2128 			(pfp_hdr->ucode_start_addr_hi << 30) |
2129 			(pfp_hdr->ucode_start_addr_lo >> 2));
2130 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
2131 			pfp_hdr->ucode_start_addr_hi >> 2);
2132 
2133 		/*
2134 		 * Program CP_ME_CNTL to reset given PIPE to take
2135 		 * effect of CP_PFP_PRGRM_CNTR_START.
2136 		 */
2137 		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2138 		if (pipe_id == 0)
2139 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2140 					PFP_PIPE0_RESET, 1);
2141 		else
2142 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2143 					PFP_PIPE1_RESET, 1);
2144 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2145 
2146 		/* Clear pfp pipe0 reset bit. */
2147 		if (pipe_id == 0)
2148 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2149 					PFP_PIPE0_RESET, 0);
2150 		else
2151 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2152 					PFP_PIPE1_RESET, 0);
2153 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2154 
2155 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
2156 			lower_32_bits(addr2));
2157 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
2158 			upper_32_bits(addr2));
2159 	}
2160 	soc21_grbm_select(adev, 0, 0, 0, 0);
2161 	mutex_unlock(&adev->srbm_mutex);
2162 
2163 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2164 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2165 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2166 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2167 
2168 	/* Invalidate the data caches */
2169 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2170 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2171 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2172 
2173 	for (i = 0; i < usec_timeout; i++) {
2174 		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2175 		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2176 			INVALIDATE_DCACHE_COMPLETE))
2177 			break;
2178 		udelay(1);
2179 	}
2180 
2181 	if (i >= usec_timeout) {
2182 		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2183 		return -EINVAL;
2184 	}
2185 
2186 	return 0;
2187 }
2188 
2189 static int gfx_v11_0_config_me_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
2190 {
2191 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2192 	uint32_t tmp;
2193 	unsigned i, pipe_id;
2194 	const struct gfx_firmware_header_v2_0 *me_hdr;
2195 
2196 	me_hdr = (const struct gfx_firmware_header_v2_0 *)
2197 		adev->gfx.me_fw->data;
2198 
2199 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
2200 		lower_32_bits(addr));
2201 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
2202 		upper_32_bits(addr));
2203 
2204 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
2205 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
2206 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
2207 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
2208 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
2209 
2210 	/*
2211 	 * Programming any of the CP_ME_IC_BASE registers
2212 	 * forces invalidation of the ME L1 I$. Wait for the
2213 	 * invalidation complete
2214 	 */
2215 	for (i = 0; i < usec_timeout; i++) {
2216 		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2217 		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2218 			INVALIDATE_CACHE_COMPLETE))
2219 			break;
2220 		udelay(1);
2221 	}
2222 
2223 	if (i >= usec_timeout) {
2224 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2225 		return -EINVAL;
2226 	}
2227 
2228 	/* Prime the instruction caches */
2229 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2230 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
2231 	WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
2232 
2233 	/* Waiting for instruction cache primed*/
2234 	for (i = 0; i < usec_timeout; i++) {
2235 		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2236 		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2237 			ICACHE_PRIMED))
2238 			break;
2239 		udelay(1);
2240 	}
2241 
2242 	if (i >= usec_timeout) {
2243 		dev_err(adev->dev, "failed to prime instruction cache\n");
2244 		return -EINVAL;
2245 	}
2246 
2247 	mutex_lock(&adev->srbm_mutex);
2248 	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2249 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2250 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
2251 			(me_hdr->ucode_start_addr_hi << 30) |
2252 			(me_hdr->ucode_start_addr_lo >> 2) );
2253 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
2254 			me_hdr->ucode_start_addr_hi>>2);
2255 
2256 		/*
2257 		 * Program CP_ME_CNTL to reset given PIPE to take
2258 		 * effect of CP_PFP_PRGRM_CNTR_START.
2259 		 */
2260 		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2261 		if (pipe_id == 0)
2262 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2263 					ME_PIPE0_RESET, 1);
2264 		else
2265 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2266 					ME_PIPE1_RESET, 1);
2267 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2268 
2269 		/* Clear pfp pipe0 reset bit. */
2270 		if (pipe_id == 0)
2271 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2272 					ME_PIPE0_RESET, 0);
2273 		else
2274 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2275 					ME_PIPE1_RESET, 0);
2276 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2277 
2278 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
2279 			lower_32_bits(addr2));
2280 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
2281 			upper_32_bits(addr2));
2282 	}
2283 	soc21_grbm_select(adev, 0, 0, 0, 0);
2284 	mutex_unlock(&adev->srbm_mutex);
2285 
2286 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2287 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2288 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2289 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2290 
2291 	/* Invalidate the data caches */
2292 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2293 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2294 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2295 
2296 	for (i = 0; i < usec_timeout; i++) {
2297 		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2298 		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2299 			INVALIDATE_DCACHE_COMPLETE))
2300 			break;
2301 		udelay(1);
2302 	}
2303 
2304 	if (i >= usec_timeout) {
2305 		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2306 		return -EINVAL;
2307 	}
2308 
2309 	return 0;
2310 }
2311 
2312 static int gfx_v11_0_config_mec_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
2313 {
2314 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2315 	uint32_t tmp;
2316 	unsigned i;
2317 	const struct gfx_firmware_header_v2_0 *mec_hdr;
2318 
2319 	mec_hdr = (const struct gfx_firmware_header_v2_0 *)
2320 		adev->gfx.mec_fw->data;
2321 
2322 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
2323 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
2324 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
2325 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2326 	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
2327 
2328 	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
2329 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
2330 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
2331 	WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
2332 
2333 	mutex_lock(&adev->srbm_mutex);
2334 	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
2335 		soc21_grbm_select(adev, 1, i, 0, 0);
2336 
2337 		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, addr2);
2338 		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
2339 		     upper_32_bits(addr2));
2340 
2341 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
2342 					mec_hdr->ucode_start_addr_lo >> 2 |
2343 					mec_hdr->ucode_start_addr_hi << 30);
2344 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
2345 					mec_hdr->ucode_start_addr_hi >> 2);
2346 
2347 		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, addr);
2348 		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
2349 		     upper_32_bits(addr));
2350 	}
2351 	mutex_unlock(&adev->srbm_mutex);
2352 	soc21_grbm_select(adev, 0, 0, 0, 0);
2353 
2354 	/* Trigger an invalidation of the L1 instruction caches */
2355 	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
2356 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2357 	WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
2358 
2359 	/* Wait for invalidation complete */
2360 	for (i = 0; i < usec_timeout; i++) {
2361 		tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
2362 		if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
2363 				       INVALIDATE_DCACHE_COMPLETE))
2364 			break;
2365 		udelay(1);
2366 	}
2367 
2368 	if (i >= usec_timeout) {
2369 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2370 		return -EINVAL;
2371 	}
2372 
2373 	/* Trigger an invalidation of the L1 instruction caches */
2374 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2375 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2376 	WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
2377 
2378 	/* Wait for invalidation complete */
2379 	for (i = 0; i < usec_timeout; i++) {
2380 		tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2381 		if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
2382 				       INVALIDATE_CACHE_COMPLETE))
2383 			break;
2384 		udelay(1);
2385 	}
2386 
2387 	if (i >= usec_timeout) {
2388 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2389 		return -EINVAL;
2390 	}
2391 
2392 	return 0;
2393 }
2394 
2395 static void gfx_v11_0_config_gfx_rs64(struct amdgpu_device *adev)
2396 {
2397 	const struct gfx_firmware_header_v2_0 *pfp_hdr;
2398 	const struct gfx_firmware_header_v2_0 *me_hdr;
2399 	const struct gfx_firmware_header_v2_0 *mec_hdr;
2400 	uint32_t pipe_id, tmp;
2401 
2402 	mec_hdr = (const struct gfx_firmware_header_v2_0 *)
2403 		adev->gfx.mec_fw->data;
2404 	me_hdr = (const struct gfx_firmware_header_v2_0 *)
2405 		adev->gfx.me_fw->data;
2406 	pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
2407 		adev->gfx.pfp_fw->data;
2408 
2409 	/* config pfp program start addr */
2410 	for (pipe_id = 0; pipe_id < 2; pipe_id++) {
2411 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2412 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
2413 			(pfp_hdr->ucode_start_addr_hi << 30) |
2414 			(pfp_hdr->ucode_start_addr_lo >> 2));
2415 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
2416 			pfp_hdr->ucode_start_addr_hi >> 2);
2417 	}
2418 	soc21_grbm_select(adev, 0, 0, 0, 0);
2419 
2420 	/* reset pfp pipe */
2421 	tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2422 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 1);
2423 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 1);
2424 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2425 
2426 	/* clear pfp pipe reset */
2427 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 0);
2428 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 0);
2429 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2430 
2431 	/* config me program start addr */
2432 	for (pipe_id = 0; pipe_id < 2; pipe_id++) {
2433 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2434 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
2435 			(me_hdr->ucode_start_addr_hi << 30) |
2436 			(me_hdr->ucode_start_addr_lo >> 2) );
2437 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
2438 			me_hdr->ucode_start_addr_hi>>2);
2439 	}
2440 	soc21_grbm_select(adev, 0, 0, 0, 0);
2441 
2442 	/* reset me pipe */
2443 	tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2444 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 1);
2445 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 1);
2446 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2447 
2448 	/* clear me pipe reset */
2449 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 0);
2450 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 0);
2451 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2452 
2453 	/* config mec program start addr */
2454 	for (pipe_id = 0; pipe_id < 4; pipe_id++) {
2455 		soc21_grbm_select(adev, 1, pipe_id, 0, 0);
2456 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
2457 					mec_hdr->ucode_start_addr_lo >> 2 |
2458 					mec_hdr->ucode_start_addr_hi << 30);
2459 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
2460 					mec_hdr->ucode_start_addr_hi >> 2);
2461 	}
2462 	soc21_grbm_select(adev, 0, 0, 0, 0);
2463 
2464 	/* reset mec pipe */
2465 	tmp = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
2466 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1);
2467 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1);
2468 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1);
2469 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1);
2470 	WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
2471 
2472 	/* clear mec pipe reset */
2473 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0);
2474 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0);
2475 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0);
2476 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0);
2477 	WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
2478 }
2479 
2480 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
2481 {
2482 	uint32_t cp_status;
2483 	uint32_t bootload_status;
2484 	int i, r;
2485 	uint64_t addr, addr2;
2486 
2487 	for (i = 0; i < adev->usec_timeout; i++) {
2488 		cp_status = RREG32_SOC15(GC, 0, regCP_STAT);
2489 
2490 		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 1))
2491 			bootload_status = RREG32_SOC15(GC, 0,
2492 					regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1);
2493 		else
2494 			bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS);
2495 
2496 		if ((cp_status == 0) &&
2497 		    (REG_GET_FIELD(bootload_status,
2498 			RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) {
2499 			break;
2500 		}
2501 		udelay(1);
2502 	}
2503 
2504 	if (i >= adev->usec_timeout) {
2505 		dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n");
2506 		return -ETIMEDOUT;
2507 	}
2508 
2509 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
2510 		if (adev->gfx.rs64_enable) {
2511 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2512 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME].offset;
2513 			addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
2514 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME_P0_STACK].offset;
2515 			r = gfx_v11_0_config_me_cache_rs64(adev, addr, addr2);
2516 			if (r)
2517 				return r;
2518 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2519 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP].offset;
2520 			addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
2521 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK].offset;
2522 			r = gfx_v11_0_config_pfp_cache_rs64(adev, addr, addr2);
2523 			if (r)
2524 				return r;
2525 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2526 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC].offset;
2527 			addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
2528 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK].offset;
2529 			r = gfx_v11_0_config_mec_cache_rs64(adev, addr, addr2);
2530 			if (r)
2531 				return r;
2532 		} else {
2533 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2534 				rlc_autoload_info[SOC21_FIRMWARE_ID_CP_ME].offset;
2535 			r = gfx_v11_0_config_me_cache(adev, addr);
2536 			if (r)
2537 				return r;
2538 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2539 				rlc_autoload_info[SOC21_FIRMWARE_ID_CP_PFP].offset;
2540 			r = gfx_v11_0_config_pfp_cache(adev, addr);
2541 			if (r)
2542 				return r;
2543 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2544 				rlc_autoload_info[SOC21_FIRMWARE_ID_CP_MEC].offset;
2545 			r = gfx_v11_0_config_mec_cache(adev, addr);
2546 			if (r)
2547 				return r;
2548 		}
2549 	}
2550 
2551 	return 0;
2552 }
2553 
2554 static int gfx_v11_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2555 {
2556 	int i;
2557 	u32 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2558 
2559 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2560 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2561 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2562 
2563 	for (i = 0; i < adev->usec_timeout; i++) {
2564 		if (RREG32_SOC15(GC, 0, regCP_STAT) == 0)
2565 			break;
2566 		udelay(1);
2567 	}
2568 
2569 	if (i >= adev->usec_timeout)
2570 		DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt");
2571 
2572 	return 0;
2573 }
2574 
2575 static int gfx_v11_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
2576 {
2577 	int r;
2578 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
2579 	const __le32 *fw_data;
2580 	unsigned i, fw_size;
2581 
2582 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2583 		adev->gfx.pfp_fw->data;
2584 
2585 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2586 
2587 	fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
2588 		le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2589 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes);
2590 
2591 	r = amdgpu_bo_create_reserved(adev, pfp_hdr->header.ucode_size_bytes,
2592 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
2593 				      &adev->gfx.pfp.pfp_fw_obj,
2594 				      &adev->gfx.pfp.pfp_fw_gpu_addr,
2595 				      (void **)&adev->gfx.pfp.pfp_fw_ptr);
2596 	if (r) {
2597 		dev_err(adev->dev, "(%d) failed to create pfp fw bo\n", r);
2598 		gfx_v11_0_pfp_fini(adev);
2599 		return r;
2600 	}
2601 
2602 	memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_data, fw_size);
2603 
2604 	amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
2605 	amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
2606 
2607 	gfx_v11_0_config_pfp_cache(adev, adev->gfx.pfp.pfp_fw_gpu_addr);
2608 
2609 	WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, 0);
2610 
2611 	for (i = 0; i < pfp_hdr->jt_size; i++)
2612 		WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_DATA,
2613 			     le32_to_cpup(fw_data + pfp_hdr->jt_offset + i));
2614 
2615 	WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2616 
2617 	return 0;
2618 }
2619 
2620 static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev)
2621 {
2622 	int r;
2623 	const struct gfx_firmware_header_v2_0 *pfp_hdr;
2624 	const __le32 *fw_ucode, *fw_data;
2625 	unsigned i, pipe_id, fw_ucode_size, fw_data_size;
2626 	uint32_t tmp;
2627 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2628 
2629 	pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
2630 		adev->gfx.pfp_fw->data;
2631 
2632 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2633 
2634 	/* instruction */
2635 	fw_ucode = (const __le32 *)(adev->gfx.pfp_fw->data +
2636 		le32_to_cpu(pfp_hdr->ucode_offset_bytes));
2637 	fw_ucode_size = le32_to_cpu(pfp_hdr->ucode_size_bytes);
2638 	/* data */
2639 	fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
2640 		le32_to_cpu(pfp_hdr->data_offset_bytes));
2641 	fw_data_size = le32_to_cpu(pfp_hdr->data_size_bytes);
2642 
2643 	/* 64kb align */
2644 	r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
2645 				      64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
2646 				      &adev->gfx.pfp.pfp_fw_obj,
2647 				      &adev->gfx.pfp.pfp_fw_gpu_addr,
2648 				      (void **)&adev->gfx.pfp.pfp_fw_ptr);
2649 	if (r) {
2650 		dev_err(adev->dev, "(%d) failed to create pfp ucode fw bo\n", r);
2651 		gfx_v11_0_pfp_fini(adev);
2652 		return r;
2653 	}
2654 
2655 	r = amdgpu_bo_create_reserved(adev, fw_data_size,
2656 				      64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
2657 				      &adev->gfx.pfp.pfp_fw_data_obj,
2658 				      &adev->gfx.pfp.pfp_fw_data_gpu_addr,
2659 				      (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
2660 	if (r) {
2661 		dev_err(adev->dev, "(%d) failed to create pfp data fw bo\n", r);
2662 		gfx_v11_0_pfp_fini(adev);
2663 		return r;
2664 	}
2665 
2666 	memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_ucode, fw_ucode_size);
2667 	memcpy(adev->gfx.pfp.pfp_fw_data_ptr, fw_data, fw_data_size);
2668 
2669 	amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
2670 	amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_data_obj);
2671 	amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
2672 	amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj);
2673 
2674 	if (amdgpu_emu_mode == 1)
2675 		adev->hdp.funcs->flush_hdp(adev, NULL);
2676 
2677 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
2678 		lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
2679 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
2680 		upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
2681 
2682 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
2683 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
2684 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
2685 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
2686 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
2687 
2688 	/*
2689 	 * Programming any of the CP_PFP_IC_BASE registers
2690 	 * forces invalidation of the ME L1 I$. Wait for the
2691 	 * invalidation complete
2692 	 */
2693 	for (i = 0; i < usec_timeout; i++) {
2694 		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2695 		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2696 			INVALIDATE_CACHE_COMPLETE))
2697 			break;
2698 		udelay(1);
2699 	}
2700 
2701 	if (i >= usec_timeout) {
2702 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2703 		return -EINVAL;
2704 	}
2705 
2706 	/* Prime the L1 instruction caches */
2707 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2708 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
2709 	WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
2710 	/* Waiting for cache primed*/
2711 	for (i = 0; i < usec_timeout; i++) {
2712 		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2713 		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2714 			ICACHE_PRIMED))
2715 			break;
2716 		udelay(1);
2717 	}
2718 
2719 	if (i >= usec_timeout) {
2720 		dev_err(adev->dev, "failed to prime instruction cache\n");
2721 		return -EINVAL;
2722 	}
2723 
2724 	mutex_lock(&adev->srbm_mutex);
2725 	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2726 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2727 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
2728 			(pfp_hdr->ucode_start_addr_hi << 30) |
2729 			(pfp_hdr->ucode_start_addr_lo >> 2) );
2730 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
2731 			pfp_hdr->ucode_start_addr_hi>>2);
2732 
2733 		/*
2734 		 * Program CP_ME_CNTL to reset given PIPE to take
2735 		 * effect of CP_PFP_PRGRM_CNTR_START.
2736 		 */
2737 		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2738 		if (pipe_id == 0)
2739 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2740 					PFP_PIPE0_RESET, 1);
2741 		else
2742 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2743 					PFP_PIPE1_RESET, 1);
2744 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2745 
2746 		/* Clear pfp pipe0 reset bit. */
2747 		if (pipe_id == 0)
2748 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2749 					PFP_PIPE0_RESET, 0);
2750 		else
2751 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2752 					PFP_PIPE1_RESET, 0);
2753 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2754 
2755 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
2756 			lower_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
2757 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
2758 			upper_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
2759 	}
2760 	soc21_grbm_select(adev, 0, 0, 0, 0);
2761 	mutex_unlock(&adev->srbm_mutex);
2762 
2763 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2764 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2765 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2766 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2767 
2768 	/* Invalidate the data caches */
2769 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2770 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2771 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2772 
2773 	for (i = 0; i < usec_timeout; i++) {
2774 		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2775 		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2776 			INVALIDATE_DCACHE_COMPLETE))
2777 			break;
2778 		udelay(1);
2779 	}
2780 
2781 	if (i >= usec_timeout) {
2782 		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2783 		return -EINVAL;
2784 	}
2785 
2786 	return 0;
2787 }
2788 
2789 static int gfx_v11_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev)
2790 {
2791 	int r;
2792 	const struct gfx_firmware_header_v1_0 *me_hdr;
2793 	const __le32 *fw_data;
2794 	unsigned i, fw_size;
2795 
2796 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
2797 		adev->gfx.me_fw->data;
2798 
2799 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2800 
2801 	fw_data = (const __le32 *)(adev->gfx.me_fw->data +
2802 		le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2803 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes);
2804 
2805 	r = amdgpu_bo_create_reserved(adev, me_hdr->header.ucode_size_bytes,
2806 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
2807 				      &adev->gfx.me.me_fw_obj,
2808 				      &adev->gfx.me.me_fw_gpu_addr,
2809 				      (void **)&adev->gfx.me.me_fw_ptr);
2810 	if (r) {
2811 		dev_err(adev->dev, "(%d) failed to create me fw bo\n", r);
2812 		gfx_v11_0_me_fini(adev);
2813 		return r;
2814 	}
2815 
2816 	memcpy(adev->gfx.me.me_fw_ptr, fw_data, fw_size);
2817 
2818 	amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
2819 	amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
2820 
2821 	gfx_v11_0_config_me_cache(adev, adev->gfx.me.me_fw_gpu_addr);
2822 
2823 	WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, 0);
2824 
2825 	for (i = 0; i < me_hdr->jt_size; i++)
2826 		WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_DATA,
2827 			     le32_to_cpup(fw_data + me_hdr->jt_offset + i));
2828 
2829 	WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, adev->gfx.me_fw_version);
2830 
2831 	return 0;
2832 }
2833 
2834 static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev)
2835 {
2836 	int r;
2837 	const struct gfx_firmware_header_v2_0 *me_hdr;
2838 	const __le32 *fw_ucode, *fw_data;
2839 	unsigned i, pipe_id, fw_ucode_size, fw_data_size;
2840 	uint32_t tmp;
2841 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2842 
2843 	me_hdr = (const struct gfx_firmware_header_v2_0 *)
2844 		adev->gfx.me_fw->data;
2845 
2846 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2847 
2848 	/* instruction */
2849 	fw_ucode = (const __le32 *)(adev->gfx.me_fw->data +
2850 		le32_to_cpu(me_hdr->ucode_offset_bytes));
2851 	fw_ucode_size = le32_to_cpu(me_hdr->ucode_size_bytes);
2852 	/* data */
2853 	fw_data = (const __le32 *)(adev->gfx.me_fw->data +
2854 		le32_to_cpu(me_hdr->data_offset_bytes));
2855 	fw_data_size = le32_to_cpu(me_hdr->data_size_bytes);
2856 
2857 	/* 64kb align*/
2858 	r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
2859 				      64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
2860 				      &adev->gfx.me.me_fw_obj,
2861 				      &adev->gfx.me.me_fw_gpu_addr,
2862 				      (void **)&adev->gfx.me.me_fw_ptr);
2863 	if (r) {
2864 		dev_err(adev->dev, "(%d) failed to create me ucode bo\n", r);
2865 		gfx_v11_0_me_fini(adev);
2866 		return r;
2867 	}
2868 
2869 	r = amdgpu_bo_create_reserved(adev, fw_data_size,
2870 				      64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
2871 				      &adev->gfx.me.me_fw_data_obj,
2872 				      &adev->gfx.me.me_fw_data_gpu_addr,
2873 				      (void **)&adev->gfx.me.me_fw_data_ptr);
2874 	if (r) {
2875 		dev_err(adev->dev, "(%d) failed to create me data bo\n", r);
2876 		gfx_v11_0_pfp_fini(adev);
2877 		return r;
2878 	}
2879 
2880 	memcpy(adev->gfx.me.me_fw_ptr, fw_ucode, fw_ucode_size);
2881 	memcpy(adev->gfx.me.me_fw_data_ptr, fw_data, fw_data_size);
2882 
2883 	amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
2884 	amdgpu_bo_kunmap(adev->gfx.me.me_fw_data_obj);
2885 	amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
2886 	amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj);
2887 
2888 	if (amdgpu_emu_mode == 1)
2889 		adev->hdp.funcs->flush_hdp(adev, NULL);
2890 
2891 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
2892 		lower_32_bits(adev->gfx.me.me_fw_gpu_addr));
2893 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
2894 		upper_32_bits(adev->gfx.me.me_fw_gpu_addr));
2895 
2896 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
2897 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
2898 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
2899 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
2900 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
2901 
2902 	/*
2903 	 * Programming any of the CP_ME_IC_BASE registers
2904 	 * forces invalidation of the ME L1 I$. Wait for the
2905 	 * invalidation complete
2906 	 */
2907 	for (i = 0; i < usec_timeout; i++) {
2908 		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2909 		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2910 			INVALIDATE_CACHE_COMPLETE))
2911 			break;
2912 		udelay(1);
2913 	}
2914 
2915 	if (i >= usec_timeout) {
2916 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2917 		return -EINVAL;
2918 	}
2919 
2920 	/* Prime the instruction caches */
2921 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2922 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
2923 	WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
2924 
2925 	/* Waiting for instruction cache primed*/
2926 	for (i = 0; i < usec_timeout; i++) {
2927 		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2928 		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2929 			ICACHE_PRIMED))
2930 			break;
2931 		udelay(1);
2932 	}
2933 
2934 	if (i >= usec_timeout) {
2935 		dev_err(adev->dev, "failed to prime instruction cache\n");
2936 		return -EINVAL;
2937 	}
2938 
2939 	mutex_lock(&adev->srbm_mutex);
2940 	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2941 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2942 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
2943 			(me_hdr->ucode_start_addr_hi << 30) |
2944 			(me_hdr->ucode_start_addr_lo >> 2) );
2945 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
2946 			me_hdr->ucode_start_addr_hi>>2);
2947 
2948 		/*
2949 		 * Program CP_ME_CNTL to reset given PIPE to take
2950 		 * effect of CP_PFP_PRGRM_CNTR_START.
2951 		 */
2952 		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2953 		if (pipe_id == 0)
2954 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2955 					ME_PIPE0_RESET, 1);
2956 		else
2957 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2958 					ME_PIPE1_RESET, 1);
2959 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2960 
2961 		/* Clear pfp pipe0 reset bit. */
2962 		if (pipe_id == 0)
2963 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2964 					ME_PIPE0_RESET, 0);
2965 		else
2966 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2967 					ME_PIPE1_RESET, 0);
2968 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2969 
2970 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
2971 			lower_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
2972 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
2973 			upper_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
2974 	}
2975 	soc21_grbm_select(adev, 0, 0, 0, 0);
2976 	mutex_unlock(&adev->srbm_mutex);
2977 
2978 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2979 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2980 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2981 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2982 
2983 	/* Invalidate the data caches */
2984 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2985 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2986 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2987 
2988 	for (i = 0; i < usec_timeout; i++) {
2989 		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2990 		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2991 			INVALIDATE_DCACHE_COMPLETE))
2992 			break;
2993 		udelay(1);
2994 	}
2995 
2996 	if (i >= usec_timeout) {
2997 		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2998 		return -EINVAL;
2999 	}
3000 
3001 	return 0;
3002 }
3003 
3004 static int gfx_v11_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3005 {
3006 	int r;
3007 
3008 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw)
3009 		return -EINVAL;
3010 
3011 	gfx_v11_0_cp_gfx_enable(adev, false);
3012 
3013 	if (adev->gfx.rs64_enable)
3014 		r = gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(adev);
3015 	else
3016 		r = gfx_v11_0_cp_gfx_load_pfp_microcode(adev);
3017 	if (r) {
3018 		dev_err(adev->dev, "(%d) failed to load pfp fw\n", r);
3019 		return r;
3020 	}
3021 
3022 	if (adev->gfx.rs64_enable)
3023 		r = gfx_v11_0_cp_gfx_load_me_microcode_rs64(adev);
3024 	else
3025 		r = gfx_v11_0_cp_gfx_load_me_microcode(adev);
3026 	if (r) {
3027 		dev_err(adev->dev, "(%d) failed to load me fw\n", r);
3028 		return r;
3029 	}
3030 
3031 	return 0;
3032 }
3033 
3034 static int gfx_v11_0_cp_gfx_start(struct amdgpu_device *adev)
3035 {
3036 	struct amdgpu_ring *ring;
3037 	const struct cs_section_def *sect = NULL;
3038 	const struct cs_extent_def *ext = NULL;
3039 	int r, i;
3040 	int ctx_reg_offset;
3041 
3042 	/* init the CP */
3043 	WREG32_SOC15(GC, 0, regCP_MAX_CONTEXT,
3044 		     adev->gfx.config.max_hw_contexts - 1);
3045 	WREG32_SOC15(GC, 0, regCP_DEVICE_ID, 1);
3046 
3047 	if (!amdgpu_async_gfx_ring)
3048 		gfx_v11_0_cp_gfx_enable(adev, true);
3049 
3050 	ring = &adev->gfx.gfx_ring[0];
3051 	r = amdgpu_ring_alloc(ring, gfx_v11_0_get_csb_size(adev));
3052 	if (r) {
3053 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3054 		return r;
3055 	}
3056 
3057 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3058 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3059 
3060 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3061 	amdgpu_ring_write(ring, 0x80000000);
3062 	amdgpu_ring_write(ring, 0x80000000);
3063 
3064 	for (sect = gfx11_cs_data; sect->section != NULL; ++sect) {
3065 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3066 			if (sect->id == SECT_CONTEXT) {
3067 				amdgpu_ring_write(ring,
3068 						  PACKET3(PACKET3_SET_CONTEXT_REG,
3069 							  ext->reg_count));
3070 				amdgpu_ring_write(ring, ext->reg_index -
3071 						  PACKET3_SET_CONTEXT_REG_START);
3072 				for (i = 0; i < ext->reg_count; i++)
3073 					amdgpu_ring_write(ring, ext->extent[i]);
3074 			}
3075 		}
3076 	}
3077 
3078 	ctx_reg_offset =
3079 		SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
3080 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
3081 	amdgpu_ring_write(ring, ctx_reg_offset);
3082 	amdgpu_ring_write(ring, adev->gfx.config.pa_sc_tile_steering_override);
3083 
3084 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3085 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3086 
3087 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3088 	amdgpu_ring_write(ring, 0);
3089 
3090 	amdgpu_ring_commit(ring);
3091 
3092 	/* submit cs packet to copy state 0 to next available state */
3093 	if (adev->gfx.num_gfx_rings > 1) {
3094 		/* maximum supported gfx ring is 2 */
3095 		ring = &adev->gfx.gfx_ring[1];
3096 		r = amdgpu_ring_alloc(ring, 2);
3097 		if (r) {
3098 			DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3099 			return r;
3100 		}
3101 
3102 		amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3103 		amdgpu_ring_write(ring, 0);
3104 
3105 		amdgpu_ring_commit(ring);
3106 	}
3107 	return 0;
3108 }
3109 
3110 static void gfx_v11_0_cp_gfx_switch_pipe(struct amdgpu_device *adev,
3111 					 CP_PIPE_ID pipe)
3112 {
3113 	u32 tmp;
3114 
3115 	tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
3116 	tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe);
3117 
3118 	WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
3119 }
3120 
3121 static void gfx_v11_0_cp_gfx_set_doorbell(struct amdgpu_device *adev,
3122 					  struct amdgpu_ring *ring)
3123 {
3124 	u32 tmp;
3125 
3126 	tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
3127 	if (ring->use_doorbell) {
3128 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3129 				    DOORBELL_OFFSET, ring->doorbell_index);
3130 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3131 				    DOORBELL_EN, 1);
3132 	} else {
3133 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3134 				    DOORBELL_EN, 0);
3135 	}
3136 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, tmp);
3137 
3138 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3139 			    DOORBELL_RANGE_LOWER, ring->doorbell_index);
3140 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, tmp);
3141 
3142 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
3143 		     CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3144 }
3145 
3146 static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev)
3147 {
3148 	struct amdgpu_ring *ring;
3149 	u32 tmp;
3150 	u32 rb_bufsz;
3151 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
3152 	u32 i;
3153 
3154 	/* Set the write pointer delay */
3155 	WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0);
3156 
3157 	/* set the RB to use vmid 0 */
3158 	WREG32_SOC15(GC, 0, regCP_RB_VMID, 0);
3159 
3160 	/* Init gfx ring 0 for pipe 0 */
3161 	mutex_lock(&adev->srbm_mutex);
3162 	gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
3163 
3164 	/* Set ring buffer size */
3165 	ring = &adev->gfx.gfx_ring[0];
3166 	rb_bufsz = order_base_2(ring->ring_size / 8);
3167 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3168 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3169 	WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
3170 
3171 	/* Initialize the ring buffer's write pointers */
3172 	ring->wptr = 0;
3173 	WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr));
3174 	WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3175 
3176 	/* set the wb address wether it's enabled or not */
3177 	rptr_addr = ring->rptr_gpu_addr;
3178 	WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3179 	WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
3180 		     CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3181 
3182 	wptr_gpu_addr = ring->wptr_gpu_addr;
3183 	WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
3184 		     lower_32_bits(wptr_gpu_addr));
3185 	WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
3186 		     upper_32_bits(wptr_gpu_addr));
3187 
3188 	mdelay(1);
3189 	WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
3190 
3191 	rb_addr = ring->gpu_addr >> 8;
3192 	WREG32_SOC15(GC, 0, regCP_RB0_BASE, rb_addr);
3193 	WREG32_SOC15(GC, 0, regCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3194 
3195 	WREG32_SOC15(GC, 0, regCP_RB_ACTIVE, 1);
3196 
3197 	gfx_v11_0_cp_gfx_set_doorbell(adev, ring);
3198 	mutex_unlock(&adev->srbm_mutex);
3199 
3200 	/* Init gfx ring 1 for pipe 1 */
3201 	if (adev->gfx.num_gfx_rings > 1) {
3202 		mutex_lock(&adev->srbm_mutex);
3203 		gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID1);
3204 		/* maximum supported gfx ring is 2 */
3205 		ring = &adev->gfx.gfx_ring[1];
3206 		rb_bufsz = order_base_2(ring->ring_size / 8);
3207 		tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz);
3208 		tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2);
3209 		WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp);
3210 		/* Initialize the ring buffer's write pointers */
3211 		ring->wptr = 0;
3212 		WREG32_SOC15(GC, 0, regCP_RB1_WPTR, lower_32_bits(ring->wptr));
3213 		WREG32_SOC15(GC, 0, regCP_RB1_WPTR_HI, upper_32_bits(ring->wptr));
3214 		/* Set the wb address wether it's enabled or not */
3215 		rptr_addr = ring->rptr_gpu_addr;
3216 		WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
3217 		WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
3218 			     CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3219 		wptr_gpu_addr = ring->wptr_gpu_addr;
3220 		WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
3221 			     lower_32_bits(wptr_gpu_addr));
3222 		WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
3223 			     upper_32_bits(wptr_gpu_addr));
3224 
3225 		mdelay(1);
3226 		WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp);
3227 
3228 		rb_addr = ring->gpu_addr >> 8;
3229 		WREG32_SOC15(GC, 0, regCP_RB1_BASE, rb_addr);
3230 		WREG32_SOC15(GC, 0, regCP_RB1_BASE_HI, upper_32_bits(rb_addr));
3231 		WREG32_SOC15(GC, 0, regCP_RB1_ACTIVE, 1);
3232 
3233 		gfx_v11_0_cp_gfx_set_doorbell(adev, ring);
3234 		mutex_unlock(&adev->srbm_mutex);
3235 	}
3236 	/* Switch to pipe 0 */
3237 	mutex_lock(&adev->srbm_mutex);
3238 	gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
3239 	mutex_unlock(&adev->srbm_mutex);
3240 
3241 	/* start the ring */
3242 	gfx_v11_0_cp_gfx_start(adev);
3243 
3244 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
3245 		ring = &adev->gfx.gfx_ring[i];
3246 		ring->sched.ready = true;
3247 	}
3248 
3249 	return 0;
3250 }
3251 
3252 static void gfx_v11_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3253 {
3254 	u32 data;
3255 
3256 	if (adev->gfx.rs64_enable) {
3257 		data = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
3258 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE,
3259 							 enable ? 0 : 1);
3260 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET,
3261 							 enable ? 0 : 1);
3262 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET,
3263 							 enable ? 0 : 1);
3264 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET,
3265 							 enable ? 0 : 1);
3266 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET,
3267 							 enable ? 0 : 1);
3268 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE,
3269 							 enable ? 1 : 0);
3270 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE,
3271 				                         enable ? 1 : 0);
3272 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE,
3273 							 enable ? 1 : 0);
3274 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE,
3275 							 enable ? 1 : 0);
3276 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT,
3277 							 enable ? 0 : 1);
3278 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, data);
3279 	} else {
3280 		data = RREG32_SOC15(GC, 0, regCP_MEC_CNTL);
3281 
3282 		if (enable) {
3283 			data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 0);
3284 			if (!adev->enable_mes_kiq)
3285 				data = REG_SET_FIELD(data, CP_MEC_CNTL,
3286 						     MEC_ME2_HALT, 0);
3287 		} else {
3288 			data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 1);
3289 			data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME2_HALT, 1);
3290 		}
3291 		WREG32_SOC15(GC, 0, regCP_MEC_CNTL, data);
3292 	}
3293 
3294 	adev->gfx.kiq.ring.sched.ready = enable;
3295 
3296 	udelay(50);
3297 }
3298 
3299 static int gfx_v11_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3300 {
3301 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3302 	const __le32 *fw_data;
3303 	unsigned i, fw_size;
3304 	u32 *fw = NULL;
3305 	int r;
3306 
3307 	if (!adev->gfx.mec_fw)
3308 		return -EINVAL;
3309 
3310 	gfx_v11_0_cp_compute_enable(adev, false);
3311 
3312 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3313 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3314 
3315 	fw_data = (const __le32 *)
3316 		(adev->gfx.mec_fw->data +
3317 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3318 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
3319 
3320 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
3321 					  PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
3322 					  &adev->gfx.mec.mec_fw_obj,
3323 					  &adev->gfx.mec.mec_fw_gpu_addr,
3324 					  (void **)&fw);
3325 	if (r) {
3326 		dev_err(adev->dev, "(%d) failed to create mec fw bo\n", r);
3327 		gfx_v11_0_mec_fini(adev);
3328 		return r;
3329 	}
3330 
3331 	memcpy(fw, fw_data, fw_size);
3332 
3333 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
3334 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
3335 
3336 	gfx_v11_0_config_mec_cache(adev, adev->gfx.mec.mec_fw_gpu_addr);
3337 
3338 	/* MEC1 */
3339 	WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, 0);
3340 
3341 	for (i = 0; i < mec_hdr->jt_size; i++)
3342 		WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_DATA,
3343 			     le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3344 
3345 	WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
3346 
3347 	return 0;
3348 }
3349 
3350 static int gfx_v11_0_cp_compute_load_microcode_rs64(struct amdgpu_device *adev)
3351 {
3352 	const struct gfx_firmware_header_v2_0 *mec_hdr;
3353 	const __le32 *fw_ucode, *fw_data;
3354 	u32 tmp, fw_ucode_size, fw_data_size;
3355 	u32 i, usec_timeout = 50000; /* Wait for 50 ms */
3356 	u32 *fw_ucode_ptr, *fw_data_ptr;
3357 	int r;
3358 
3359 	if (!adev->gfx.mec_fw)
3360 		return -EINVAL;
3361 
3362 	gfx_v11_0_cp_compute_enable(adev, false);
3363 
3364 	mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
3365 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3366 
3367 	fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data +
3368 				le32_to_cpu(mec_hdr->ucode_offset_bytes));
3369 	fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes);
3370 
3371 	fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
3372 				le32_to_cpu(mec_hdr->data_offset_bytes));
3373 	fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes);
3374 
3375 	r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
3376 				      64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
3377 				      &adev->gfx.mec.mec_fw_obj,
3378 				      &adev->gfx.mec.mec_fw_gpu_addr,
3379 				      (void **)&fw_ucode_ptr);
3380 	if (r) {
3381 		dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
3382 		gfx_v11_0_mec_fini(adev);
3383 		return r;
3384 	}
3385 
3386 	r = amdgpu_bo_create_reserved(adev, fw_data_size,
3387 				      64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
3388 				      &adev->gfx.mec.mec_fw_data_obj,
3389 				      &adev->gfx.mec.mec_fw_data_gpu_addr,
3390 				      (void **)&fw_data_ptr);
3391 	if (r) {
3392 		dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
3393 		gfx_v11_0_mec_fini(adev);
3394 		return r;
3395 	}
3396 
3397 	memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size);
3398 	memcpy(fw_data_ptr, fw_data, fw_data_size);
3399 
3400 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
3401 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj);
3402 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
3403 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj);
3404 
3405 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
3406 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3407 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
3408 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3409 	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
3410 
3411 	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
3412 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
3413 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
3414 	WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
3415 
3416 	mutex_lock(&adev->srbm_mutex);
3417 	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
3418 		soc21_grbm_select(adev, 1, i, 0, 0);
3419 
3420 		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, adev->gfx.mec.mec_fw_data_gpu_addr);
3421 		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
3422 		     upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr));
3423 
3424 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
3425 					mec_hdr->ucode_start_addr_lo >> 2 |
3426 					mec_hdr->ucode_start_addr_hi << 30);
3427 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
3428 					mec_hdr->ucode_start_addr_hi >> 2);
3429 
3430 		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr);
3431 		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
3432 		     upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3433 	}
3434 	mutex_unlock(&adev->srbm_mutex);
3435 	soc21_grbm_select(adev, 0, 0, 0, 0);
3436 
3437 	/* Trigger an invalidation of the L1 instruction caches */
3438 	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
3439 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
3440 	WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
3441 
3442 	/* Wait for invalidation complete */
3443 	for (i = 0; i < usec_timeout; i++) {
3444 		tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
3445 		if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
3446 				       INVALIDATE_DCACHE_COMPLETE))
3447 			break;
3448 		udelay(1);
3449 	}
3450 
3451 	if (i >= usec_timeout) {
3452 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
3453 		return -EINVAL;
3454 	}
3455 
3456 	/* Trigger an invalidation of the L1 instruction caches */
3457 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
3458 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
3459 	WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
3460 
3461 	/* Wait for invalidation complete */
3462 	for (i = 0; i < usec_timeout; i++) {
3463 		tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
3464 		if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
3465 				       INVALIDATE_CACHE_COMPLETE))
3466 			break;
3467 		udelay(1);
3468 	}
3469 
3470 	if (i >= usec_timeout) {
3471 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
3472 		return -EINVAL;
3473 	}
3474 
3475 	return 0;
3476 }
3477 
3478 static void gfx_v11_0_kiq_setting(struct amdgpu_ring *ring)
3479 {
3480 	uint32_t tmp;
3481 	struct amdgpu_device *adev = ring->adev;
3482 
3483 	/* tell RLC which is KIQ queue */
3484 	tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
3485 	tmp &= 0xffffff00;
3486 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3487 	WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
3488 	tmp |= 0x80;
3489 	WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
3490 }
3491 
3492 static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev)
3493 {
3494 	/* set graphics engine doorbell range */
3495 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER,
3496 		     (adev->doorbell_index.gfx_ring0 * 2) << 2);
3497 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
3498 		     (adev->doorbell_index.gfx_userqueue_end * 2) << 2);
3499 
3500 	/* set compute engine doorbell range */
3501 	WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
3502 		     (adev->doorbell_index.kiq * 2) << 2);
3503 	WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
3504 		     (adev->doorbell_index.userqueue_end * 2) << 2);
3505 }
3506 
3507 static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
3508 				  struct amdgpu_mqd_prop *prop)
3509 {
3510 	struct v11_gfx_mqd *mqd = m;
3511 	uint64_t hqd_gpu_addr, wb_gpu_addr;
3512 	uint32_t tmp;
3513 	uint32_t rb_bufsz;
3514 
3515 	/* set up gfx hqd wptr */
3516 	mqd->cp_gfx_hqd_wptr = 0;
3517 	mqd->cp_gfx_hqd_wptr_hi = 0;
3518 
3519 	/* set the pointer to the MQD */
3520 	mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc;
3521 	mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
3522 
3523 	/* set up mqd control */
3524 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL);
3525 	tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0);
3526 	tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1);
3527 	tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0);
3528 	mqd->cp_gfx_mqd_control = tmp;
3529 
3530 	/* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */
3531 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID);
3532 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
3533 	mqd->cp_gfx_hqd_vmid = 0;
3534 
3535 	/* set up default queue priority level
3536 	 * 0x0 = low priority, 0x1 = high priority */
3537 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY);
3538 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0);
3539 	mqd->cp_gfx_hqd_queue_priority = tmp;
3540 
3541 	/* set up time quantum */
3542 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM);
3543 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1);
3544 	mqd->cp_gfx_hqd_quantum = tmp;
3545 
3546 	/* set up gfx hqd base. this is similar as CP_RB_BASE */
3547 	hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
3548 	mqd->cp_gfx_hqd_base = hqd_gpu_addr;
3549 	mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr);
3550 
3551 	/* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */
3552 	wb_gpu_addr = prop->rptr_gpu_addr;
3553 	mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc;
3554 	mqd->cp_gfx_hqd_rptr_addr_hi =
3555 		upper_32_bits(wb_gpu_addr) & 0xffff;
3556 
3557 	/* set up rb_wptr_poll addr */
3558 	wb_gpu_addr = prop->wptr_gpu_addr;
3559 	mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3560 	mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3561 
3562 	/* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
3563 	rb_bufsz = order_base_2(prop->queue_size / 4) - 1;
3564 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL);
3565 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
3566 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
3567 #ifdef __BIG_ENDIAN
3568 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
3569 #endif
3570 	mqd->cp_gfx_hqd_cntl = tmp;
3571 
3572 	/* set up cp_doorbell_control */
3573 	tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
3574 	if (prop->use_doorbell) {
3575 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3576 				    DOORBELL_OFFSET, prop->doorbell_index);
3577 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3578 				    DOORBELL_EN, 1);
3579 	} else
3580 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3581 				    DOORBELL_EN, 0);
3582 	mqd->cp_rb_doorbell_control = tmp;
3583 
3584 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3585 	mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR);
3586 
3587 	/* active the queue */
3588 	mqd->cp_gfx_hqd_active = 1;
3589 
3590 	return 0;
3591 }
3592 
3593 #ifdef BRING_UP_DEBUG
3594 static int gfx_v11_0_gfx_queue_init_register(struct amdgpu_ring *ring)
3595 {
3596 	struct amdgpu_device *adev = ring->adev;
3597 	struct v11_gfx_mqd *mqd = ring->mqd_ptr;
3598 
3599 	/* set mmCP_GFX_HQD_WPTR/_HI to 0 */
3600 	WREG32_SOC15(GC, 0, regCP_GFX_HQD_WPTR, mqd->cp_gfx_hqd_wptr);
3601 	WREG32_SOC15(GC, 0, regCP_GFX_HQD_WPTR_HI, mqd->cp_gfx_hqd_wptr_hi);
3602 
3603 	/* set GFX_MQD_BASE */
3604 	WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr);
3605 	WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
3606 
3607 	/* set GFX_MQD_CONTROL */
3608 	WREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL, mqd->cp_gfx_mqd_control);
3609 
3610 	/* set GFX_HQD_VMID to 0 */
3611 	WREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID, mqd->cp_gfx_hqd_vmid);
3612 
3613 	WREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY,
3614 			mqd->cp_gfx_hqd_queue_priority);
3615 	WREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM, mqd->cp_gfx_hqd_quantum);
3616 
3617 	/* set GFX_HQD_BASE, similar as CP_RB_BASE */
3618 	WREG32_SOC15(GC, 0, regCP_GFX_HQD_BASE, mqd->cp_gfx_hqd_base);
3619 	WREG32_SOC15(GC, 0, regCP_GFX_HQD_BASE_HI, mqd->cp_gfx_hqd_base_hi);
3620 
3621 	/* set GFX_HQD_RPTR_ADDR, similar as CP_RB_RPTR */
3622 	WREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR_ADDR, mqd->cp_gfx_hqd_rptr_addr);
3623 	WREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR_ADDR_HI, mqd->cp_gfx_hqd_rptr_addr_hi);
3624 
3625 	/* set GFX_HQD_CNTL, similar as CP_RB_CNTL */
3626 	WREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL, mqd->cp_gfx_hqd_cntl);
3627 
3628 	/* set RB_WPTR_POLL_ADDR */
3629 	WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO, mqd->cp_rb_wptr_poll_addr_lo);
3630 	WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI, mqd->cp_rb_wptr_poll_addr_hi);
3631 
3632 	/* set RB_DOORBELL_CONTROL */
3633 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, mqd->cp_rb_doorbell_control);
3634 
3635 	/* active the queue */
3636 	WREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE, mqd->cp_gfx_hqd_active);
3637 
3638 	return 0;
3639 }
3640 #endif
3641 
3642 static int gfx_v11_0_gfx_init_queue(struct amdgpu_ring *ring)
3643 {
3644 	struct amdgpu_device *adev = ring->adev;
3645 	struct v11_gfx_mqd *mqd = ring->mqd_ptr;
3646 	int mqd_idx = ring - &adev->gfx.gfx_ring[0];
3647 
3648 	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
3649 		memset((void *)mqd, 0, sizeof(*mqd));
3650 		mutex_lock(&adev->srbm_mutex);
3651 		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3652 		amdgpu_ring_init_mqd(ring);
3653 #ifdef BRING_UP_DEBUG
3654 		gfx_v11_0_gfx_queue_init_register(ring);
3655 #endif
3656 		soc21_grbm_select(adev, 0, 0, 0, 0);
3657 		mutex_unlock(&adev->srbm_mutex);
3658 		if (adev->gfx.me.mqd_backup[mqd_idx])
3659 			memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
3660 	} else if (amdgpu_in_reset(adev)) {
3661 		/* reset mqd with the backup copy */
3662 		if (adev->gfx.me.mqd_backup[mqd_idx])
3663 			memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
3664 		/* reset the ring */
3665 		ring->wptr = 0;
3666 		*ring->wptr_cpu_addr = 0;
3667 		amdgpu_ring_clear_ring(ring);
3668 #ifdef BRING_UP_DEBUG
3669 		mutex_lock(&adev->srbm_mutex);
3670 		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3671 		gfx_v11_0_gfx_queue_init_register(ring);
3672 		soc21_grbm_select(adev, 0, 0, 0, 0);
3673 		mutex_unlock(&adev->srbm_mutex);
3674 #endif
3675 	} else {
3676 		amdgpu_ring_clear_ring(ring);
3677 	}
3678 
3679 	return 0;
3680 }
3681 
3682 #ifndef BRING_UP_DEBUG
3683 static int gfx_v11_0_kiq_enable_kgq(struct amdgpu_device *adev)
3684 {
3685 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
3686 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3687 	int r, i;
3688 
3689 	if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
3690 		return -EINVAL;
3691 
3692 	r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
3693 					adev->gfx.num_gfx_rings);
3694 	if (r) {
3695 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3696 		return r;
3697 	}
3698 
3699 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3700 		kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.gfx_ring[i]);
3701 
3702 	return amdgpu_ring_test_helper(kiq_ring);
3703 }
3704 #endif
3705 
3706 static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
3707 {
3708 	int r, i;
3709 	struct amdgpu_ring *ring;
3710 
3711 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
3712 		ring = &adev->gfx.gfx_ring[i];
3713 
3714 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3715 		if (unlikely(r != 0))
3716 			goto done;
3717 
3718 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3719 		if (!r) {
3720 			r = gfx_v11_0_gfx_init_queue(ring);
3721 			amdgpu_bo_kunmap(ring->mqd_obj);
3722 			ring->mqd_ptr = NULL;
3723 		}
3724 		amdgpu_bo_unreserve(ring->mqd_obj);
3725 		if (r)
3726 			goto done;
3727 	}
3728 #ifndef BRING_UP_DEBUG
3729 	r = gfx_v11_0_kiq_enable_kgq(adev);
3730 	if (r)
3731 		goto done;
3732 #endif
3733 	r = gfx_v11_0_cp_gfx_start(adev);
3734 	if (r)
3735 		goto done;
3736 
3737 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
3738 		ring = &adev->gfx.gfx_ring[i];
3739 		ring->sched.ready = true;
3740 	}
3741 done:
3742 	return r;
3743 }
3744 
3745 static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
3746 				      struct amdgpu_mqd_prop *prop)
3747 {
3748 	struct v11_compute_mqd *mqd = m;
3749 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3750 	uint32_t tmp;
3751 
3752 	mqd->header = 0xC0310800;
3753 	mqd->compute_pipelinestat_enable = 0x00000001;
3754 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3755 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3756 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3757 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3758 	mqd->compute_misc_reserved = 0x00000007;
3759 
3760 	eop_base_addr = prop->eop_gpu_addr >> 8;
3761 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3762 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3763 
3764 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3765 	tmp = RREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL);
3766 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3767 			(order_base_2(GFX11_MEC_HPD_SIZE / 4) - 1));
3768 
3769 	mqd->cp_hqd_eop_control = tmp;
3770 
3771 	/* enable doorbell? */
3772 	tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
3773 
3774 	if (prop->use_doorbell) {
3775 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3776 				    DOORBELL_OFFSET, prop->doorbell_index);
3777 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3778 				    DOORBELL_EN, 1);
3779 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3780 				    DOORBELL_SOURCE, 0);
3781 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3782 				    DOORBELL_HIT, 0);
3783 	} else {
3784 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3785 				    DOORBELL_EN, 0);
3786 	}
3787 
3788 	mqd->cp_hqd_pq_doorbell_control = tmp;
3789 
3790 	/* disable the queue if it's active */
3791 	mqd->cp_hqd_dequeue_request = 0;
3792 	mqd->cp_hqd_pq_rptr = 0;
3793 	mqd->cp_hqd_pq_wptr_lo = 0;
3794 	mqd->cp_hqd_pq_wptr_hi = 0;
3795 
3796 	/* set the pointer to the MQD */
3797 	mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc;
3798 	mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
3799 
3800 	/* set MQD vmid to 0 */
3801 	tmp = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL);
3802 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3803 	mqd->cp_mqd_control = tmp;
3804 
3805 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3806 	hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
3807 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3808 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3809 
3810 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3811 	tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL);
3812 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3813 			    (order_base_2(prop->queue_size / 4) - 1));
3814 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3815 			    (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
3816 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3817 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
3818 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3819 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3820 	mqd->cp_hqd_pq_control = tmp;
3821 
3822 	/* set the wb address whether it's enabled or not */
3823 	wb_gpu_addr = prop->rptr_gpu_addr;
3824 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3825 	mqd->cp_hqd_pq_rptr_report_addr_hi =
3826 		upper_32_bits(wb_gpu_addr) & 0xffff;
3827 
3828 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3829 	wb_gpu_addr = prop->wptr_gpu_addr;
3830 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3831 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3832 
3833 	tmp = 0;
3834 	/* enable the doorbell if requested */
3835 	if (prop->use_doorbell) {
3836 		tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
3837 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3838 				DOORBELL_OFFSET, prop->doorbell_index);
3839 
3840 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3841 				    DOORBELL_EN, 1);
3842 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3843 				    DOORBELL_SOURCE, 0);
3844 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3845 				    DOORBELL_HIT, 0);
3846 	}
3847 
3848 	mqd->cp_hqd_pq_doorbell_control = tmp;
3849 
3850 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3851 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR);
3852 
3853 	/* set the vmid for the queue */
3854 	mqd->cp_hqd_vmid = 0;
3855 
3856 	tmp = RREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE);
3857 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55);
3858 	mqd->cp_hqd_persistent_state = tmp;
3859 
3860 	/* set MIN_IB_AVAIL_SIZE */
3861 	tmp = RREG32_SOC15(GC, 0, regCP_HQD_IB_CONTROL);
3862 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3863 	mqd->cp_hqd_ib_control = tmp;
3864 
3865 	/* set static priority for a compute queue/ring */
3866 	mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority;
3867 	mqd->cp_hqd_queue_priority = prop->hqd_queue_priority;
3868 
3869 	mqd->cp_hqd_active = prop->hqd_active;
3870 
3871 	return 0;
3872 }
3873 
3874 static int gfx_v11_0_kiq_init_register(struct amdgpu_ring *ring)
3875 {
3876 	struct amdgpu_device *adev = ring->adev;
3877 	struct v11_compute_mqd *mqd = ring->mqd_ptr;
3878 	int j;
3879 
3880 	/* inactivate the queue */
3881 	if (amdgpu_sriov_vf(adev))
3882 		WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0);
3883 
3884 	/* disable wptr polling */
3885 	WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3886 
3887 	/* write the EOP addr */
3888 	WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR,
3889 	       mqd->cp_hqd_eop_base_addr_lo);
3890 	WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI,
3891 	       mqd->cp_hqd_eop_base_addr_hi);
3892 
3893 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3894 	WREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL,
3895 	       mqd->cp_hqd_eop_control);
3896 
3897 	/* enable doorbell? */
3898 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
3899 	       mqd->cp_hqd_pq_doorbell_control);
3900 
3901 	/* disable the queue if it's active */
3902 	if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) {
3903 		WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1);
3904 		for (j = 0; j < adev->usec_timeout; j++) {
3905 			if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
3906 				break;
3907 			udelay(1);
3908 		}
3909 		WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST,
3910 		       mqd->cp_hqd_dequeue_request);
3911 		WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR,
3912 		       mqd->cp_hqd_pq_rptr);
3913 		WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
3914 		       mqd->cp_hqd_pq_wptr_lo);
3915 		WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
3916 		       mqd->cp_hqd_pq_wptr_hi);
3917 	}
3918 
3919 	/* set the pointer to the MQD */
3920 	WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR,
3921 	       mqd->cp_mqd_base_addr_lo);
3922 	WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI,
3923 	       mqd->cp_mqd_base_addr_hi);
3924 
3925 	/* set MQD vmid to 0 */
3926 	WREG32_SOC15(GC, 0, regCP_MQD_CONTROL,
3927 	       mqd->cp_mqd_control);
3928 
3929 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3930 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE,
3931 	       mqd->cp_hqd_pq_base_lo);
3932 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI,
3933 	       mqd->cp_hqd_pq_base_hi);
3934 
3935 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3936 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL,
3937 	       mqd->cp_hqd_pq_control);
3938 
3939 	/* set the wb address whether it's enabled or not */
3940 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR,
3941 		mqd->cp_hqd_pq_rptr_report_addr_lo);
3942 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3943 		mqd->cp_hqd_pq_rptr_report_addr_hi);
3944 
3945 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3946 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR,
3947 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
3948 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3949 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3950 
3951 	/* enable the doorbell if requested */
3952 	if (ring->use_doorbell) {
3953 		WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
3954 			(adev->doorbell_index.kiq * 2) << 2);
3955 		WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
3956 			(adev->doorbell_index.userqueue_end * 2) << 2);
3957 	}
3958 
3959 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
3960 	       mqd->cp_hqd_pq_doorbell_control);
3961 
3962 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3963 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
3964 	       mqd->cp_hqd_pq_wptr_lo);
3965 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
3966 	       mqd->cp_hqd_pq_wptr_hi);
3967 
3968 	/* set the vmid for the queue */
3969 	WREG32_SOC15(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid);
3970 
3971 	WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE,
3972 	       mqd->cp_hqd_persistent_state);
3973 
3974 	/* activate the queue */
3975 	WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE,
3976 	       mqd->cp_hqd_active);
3977 
3978 	if (ring->use_doorbell)
3979 		WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3980 
3981 	return 0;
3982 }
3983 
3984 static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring)
3985 {
3986 	struct amdgpu_device *adev = ring->adev;
3987 	struct v11_compute_mqd *mqd = ring->mqd_ptr;
3988 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3989 
3990 	gfx_v11_0_kiq_setting(ring);
3991 
3992 	if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
3993 		/* reset MQD to a clean status */
3994 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3995 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
3996 
3997 		/* reset ring buffer */
3998 		ring->wptr = 0;
3999 		amdgpu_ring_clear_ring(ring);
4000 
4001 		mutex_lock(&adev->srbm_mutex);
4002 		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4003 		gfx_v11_0_kiq_init_register(ring);
4004 		soc21_grbm_select(adev, 0, 0, 0, 0);
4005 		mutex_unlock(&adev->srbm_mutex);
4006 	} else {
4007 		memset((void *)mqd, 0, sizeof(*mqd));
4008 		mutex_lock(&adev->srbm_mutex);
4009 		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4010 		amdgpu_ring_init_mqd(ring);
4011 		gfx_v11_0_kiq_init_register(ring);
4012 		soc21_grbm_select(adev, 0, 0, 0, 0);
4013 		mutex_unlock(&adev->srbm_mutex);
4014 
4015 		if (adev->gfx.mec.mqd_backup[mqd_idx])
4016 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
4017 	}
4018 
4019 	return 0;
4020 }
4021 
4022 static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring)
4023 {
4024 	struct amdgpu_device *adev = ring->adev;
4025 	struct v11_compute_mqd *mqd = ring->mqd_ptr;
4026 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
4027 
4028 	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4029 		memset((void *)mqd, 0, sizeof(*mqd));
4030 		mutex_lock(&adev->srbm_mutex);
4031 		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4032 		amdgpu_ring_init_mqd(ring);
4033 		soc21_grbm_select(adev, 0, 0, 0, 0);
4034 		mutex_unlock(&adev->srbm_mutex);
4035 
4036 		if (adev->gfx.mec.mqd_backup[mqd_idx])
4037 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
4038 	} else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4039 		/* reset MQD to a clean status */
4040 		if (adev->gfx.mec.mqd_backup[mqd_idx])
4041 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
4042 
4043 		/* reset ring buffer */
4044 		ring->wptr = 0;
4045 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
4046 		amdgpu_ring_clear_ring(ring);
4047 	} else {
4048 		amdgpu_ring_clear_ring(ring);
4049 	}
4050 
4051 	return 0;
4052 }
4053 
4054 static int gfx_v11_0_kiq_resume(struct amdgpu_device *adev)
4055 {
4056 	struct amdgpu_ring *ring;
4057 	int r;
4058 
4059 	ring = &adev->gfx.kiq.ring;
4060 
4061 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
4062 	if (unlikely(r != 0))
4063 		return r;
4064 
4065 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
4066 	if (unlikely(r != 0)) {
4067 		amdgpu_bo_unreserve(ring->mqd_obj);
4068 		return r;
4069 	}
4070 
4071 	gfx_v11_0_kiq_init_queue(ring);
4072 	amdgpu_bo_kunmap(ring->mqd_obj);
4073 	ring->mqd_ptr = NULL;
4074 	amdgpu_bo_unreserve(ring->mqd_obj);
4075 	ring->sched.ready = true;
4076 	return 0;
4077 }
4078 
4079 static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev)
4080 {
4081 	struct amdgpu_ring *ring = NULL;
4082 	int r = 0, i;
4083 
4084 	if (!amdgpu_async_gfx_ring)
4085 		gfx_v11_0_cp_compute_enable(adev, true);
4086 
4087 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4088 		ring = &adev->gfx.compute_ring[i];
4089 
4090 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
4091 		if (unlikely(r != 0))
4092 			goto done;
4093 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
4094 		if (!r) {
4095 			r = gfx_v11_0_kcq_init_queue(ring);
4096 			amdgpu_bo_kunmap(ring->mqd_obj);
4097 			ring->mqd_ptr = NULL;
4098 		}
4099 		amdgpu_bo_unreserve(ring->mqd_obj);
4100 		if (r)
4101 			goto done;
4102 	}
4103 
4104 	r = amdgpu_gfx_enable_kcq(adev);
4105 done:
4106 	return r;
4107 }
4108 
4109 static int gfx_v11_0_cp_resume(struct amdgpu_device *adev)
4110 {
4111 	int r, i;
4112 	struct amdgpu_ring *ring;
4113 
4114 	if (!(adev->flags & AMD_IS_APU))
4115 		gfx_v11_0_enable_gui_idle_interrupt(adev, false);
4116 
4117 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4118 		/* legacy firmware loading */
4119 		r = gfx_v11_0_cp_gfx_load_microcode(adev);
4120 		if (r)
4121 			return r;
4122 
4123 		if (adev->gfx.rs64_enable)
4124 			r = gfx_v11_0_cp_compute_load_microcode_rs64(adev);
4125 		else
4126 			r = gfx_v11_0_cp_compute_load_microcode(adev);
4127 		if (r)
4128 			return r;
4129 	}
4130 
4131 	gfx_v11_0_cp_set_doorbell_range(adev);
4132 
4133 	if (amdgpu_async_gfx_ring) {
4134 		gfx_v11_0_cp_compute_enable(adev, true);
4135 		gfx_v11_0_cp_gfx_enable(adev, true);
4136 	}
4137 
4138 	if (adev->enable_mes_kiq && adev->mes.kiq_hw_init)
4139 		r = amdgpu_mes_kiq_hw_init(adev);
4140 	else
4141 		r = gfx_v11_0_kiq_resume(adev);
4142 	if (r)
4143 		return r;
4144 
4145 	r = gfx_v11_0_kcq_resume(adev);
4146 	if (r)
4147 		return r;
4148 
4149 	if (!amdgpu_async_gfx_ring) {
4150 		r = gfx_v11_0_cp_gfx_resume(adev);
4151 		if (r)
4152 			return r;
4153 	} else {
4154 		r = gfx_v11_0_cp_async_gfx_ring_resume(adev);
4155 		if (r)
4156 			return r;
4157 	}
4158 
4159 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4160 		ring = &adev->gfx.gfx_ring[i];
4161 		r = amdgpu_ring_test_helper(ring);
4162 		if (r)
4163 			return r;
4164 	}
4165 
4166 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4167 		ring = &adev->gfx.compute_ring[i];
4168 		r = amdgpu_ring_test_helper(ring);
4169 		if (r)
4170 			return r;
4171 	}
4172 
4173 	return 0;
4174 }
4175 
4176 static void gfx_v11_0_cp_enable(struct amdgpu_device *adev, bool enable)
4177 {
4178 	gfx_v11_0_cp_gfx_enable(adev, enable);
4179 	gfx_v11_0_cp_compute_enable(adev, enable);
4180 }
4181 
4182 static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev)
4183 {
4184 	int r;
4185 	bool value;
4186 
4187 	r = adev->gfxhub.funcs->gart_enable(adev);
4188 	if (r)
4189 		return r;
4190 
4191 	adev->hdp.funcs->flush_hdp(adev, NULL);
4192 
4193 	value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
4194 		false : true;
4195 
4196 	adev->gfxhub.funcs->set_fault_enable_default(adev, value);
4197 	amdgpu_gmc_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB_0, 0);
4198 
4199 	return 0;
4200 }
4201 
4202 static void gfx_v11_0_select_cp_fw_arch(struct amdgpu_device *adev)
4203 {
4204 	u32 tmp;
4205 
4206 	/* select RS64 */
4207 	if (adev->gfx.rs64_enable) {
4208 		tmp = RREG32_SOC15(GC, 0, regCP_GFX_CNTL);
4209 		tmp = REG_SET_FIELD(tmp, CP_GFX_CNTL, ENGINE_SEL, 1);
4210 		WREG32_SOC15(GC, 0, regCP_GFX_CNTL, tmp);
4211 
4212 		tmp = RREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL);
4213 		tmp = REG_SET_FIELD(tmp, CP_MEC_ISA_CNTL, ISA_MODE, 1);
4214 		WREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL, tmp);
4215 	}
4216 
4217 	if (amdgpu_emu_mode == 1)
4218 		msleep(100);
4219 }
4220 
4221 static int get_gb_addr_config(struct amdgpu_device * adev)
4222 {
4223 	u32 gb_addr_config;
4224 
4225 	gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG);
4226 	if (gb_addr_config == 0)
4227 		return -EINVAL;
4228 
4229 	adev->gfx.config.gb_addr_config_fields.num_pkrs =
4230 		1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS);
4231 
4232 	adev->gfx.config.gb_addr_config = gb_addr_config;
4233 
4234 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
4235 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4236 				      GB_ADDR_CONFIG, NUM_PIPES);
4237 
4238 	adev->gfx.config.max_tile_pipes =
4239 		adev->gfx.config.gb_addr_config_fields.num_pipes;
4240 
4241 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
4242 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4243 				      GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS);
4244 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
4245 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4246 				      GB_ADDR_CONFIG, NUM_RB_PER_SE);
4247 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
4248 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4249 				      GB_ADDR_CONFIG, NUM_SHADER_ENGINES);
4250 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
4251 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4252 				      GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE));
4253 
4254 	return 0;
4255 }
4256 
4257 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev)
4258 {
4259 	uint32_t data;
4260 
4261 	data = RREG32_SOC15(GC, 0, regCPC_PSP_DEBUG);
4262 	data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK;
4263 	WREG32_SOC15(GC, 0, regCPC_PSP_DEBUG, data);
4264 
4265 	data = RREG32_SOC15(GC, 0, regCPG_PSP_DEBUG);
4266 	data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK;
4267 	WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data);
4268 }
4269 
4270 static int gfx_v11_0_hw_init(void *handle)
4271 {
4272 	int r;
4273 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4274 
4275 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
4276 		if (adev->gfx.imu.funcs) {
4277 			/* RLC autoload sequence 1: Program rlc ram */
4278 			if (adev->gfx.imu.funcs->program_rlc_ram)
4279 				adev->gfx.imu.funcs->program_rlc_ram(adev);
4280 		}
4281 		/* rlc autoload firmware */
4282 		r = gfx_v11_0_rlc_backdoor_autoload_enable(adev);
4283 		if (r)
4284 			return r;
4285 	} else {
4286 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4287 			if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
4288 				if (adev->gfx.imu.funcs->load_microcode)
4289 					adev->gfx.imu.funcs->load_microcode(adev);
4290 				if (adev->gfx.imu.funcs->setup_imu)
4291 					adev->gfx.imu.funcs->setup_imu(adev);
4292 				if (adev->gfx.imu.funcs->start_imu)
4293 					adev->gfx.imu.funcs->start_imu(adev);
4294 			}
4295 
4296 			/* disable gpa mode in backdoor loading */
4297 			gfx_v11_0_disable_gpa_mode(adev);
4298 		}
4299 	}
4300 
4301 	if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) ||
4302 	    (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
4303 		r = gfx_v11_0_wait_for_rlc_autoload_complete(adev);
4304 		if (r) {
4305 			dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r);
4306 			return r;
4307 		}
4308 	}
4309 
4310 	adev->gfx.is_poweron = true;
4311 
4312 	if(get_gb_addr_config(adev))
4313 		DRM_WARN("Invalid gb_addr_config !\n");
4314 
4315 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
4316 	    adev->gfx.rs64_enable)
4317 		gfx_v11_0_config_gfx_rs64(adev);
4318 
4319 	r = gfx_v11_0_gfxhub_enable(adev);
4320 	if (r)
4321 		return r;
4322 
4323 	if (!amdgpu_emu_mode)
4324 		gfx_v11_0_init_golden_registers(adev);
4325 
4326 	if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) ||
4327 	    (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && amdgpu_dpm == 1)) {
4328 		/**
4329 		 * For gfx 11, rlc firmware loading relies on smu firmware is
4330 		 * loaded firstly, so in direct type, it has to load smc ucode
4331 		 * here before rlc.
4332 		 */
4333 		if (!(adev->flags & AMD_IS_APU)) {
4334 			r = amdgpu_pm_load_smu_firmware(adev, NULL);
4335 			if (r)
4336 				return r;
4337 		}
4338 	}
4339 
4340 	gfx_v11_0_constants_init(adev);
4341 
4342 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
4343 		gfx_v11_0_select_cp_fw_arch(adev);
4344 
4345 	if (adev->nbio.funcs->gc_doorbell_init)
4346 		adev->nbio.funcs->gc_doorbell_init(adev);
4347 
4348 	r = gfx_v11_0_rlc_resume(adev);
4349 	if (r)
4350 		return r;
4351 
4352 	/*
4353 	 * init golden registers and rlc resume may override some registers,
4354 	 * reconfig them here
4355 	 */
4356 	gfx_v11_0_tcp_harvest(adev);
4357 
4358 	r = gfx_v11_0_cp_resume(adev);
4359 	if (r)
4360 		return r;
4361 
4362 	return r;
4363 }
4364 
4365 #ifndef BRING_UP_DEBUG
4366 static int gfx_v11_0_kiq_disable_kgq(struct amdgpu_device *adev)
4367 {
4368 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4369 	struct amdgpu_ring *kiq_ring = &kiq->ring;
4370 	int i, r = 0;
4371 
4372 	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
4373 		return -EINVAL;
4374 
4375 	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
4376 					adev->gfx.num_gfx_rings))
4377 		return -ENOMEM;
4378 
4379 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4380 		kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i],
4381 					   PREEMPT_QUEUES, 0, 0);
4382 
4383 	if (adev->gfx.kiq.ring.sched.ready)
4384 		r = amdgpu_ring_test_helper(kiq_ring);
4385 
4386 	return r;
4387 }
4388 #endif
4389 
4390 static int gfx_v11_0_hw_fini(void *handle)
4391 {
4392 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4393 	int r;
4394 	uint32_t tmp;
4395 
4396 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4397 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4398 
4399 	if (!adev->no_hw_access) {
4400 #ifndef BRING_UP_DEBUG
4401 		if (amdgpu_async_gfx_ring) {
4402 			r = gfx_v11_0_kiq_disable_kgq(adev);
4403 			if (r)
4404 				DRM_ERROR("KGQ disable failed\n");
4405 		}
4406 #endif
4407 		if (amdgpu_gfx_disable_kcq(adev))
4408 			DRM_ERROR("KCQ disable failed\n");
4409 
4410 		amdgpu_mes_kiq_hw_fini(adev);
4411 	}
4412 
4413 	if (amdgpu_sriov_vf(adev)) {
4414 		gfx_v11_0_cp_gfx_enable(adev, false);
4415 		/* Program KIQ position of RLC_CP_SCHEDULERS during destroy */
4416 		tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
4417 		tmp &= 0xffffff00;
4418 		WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
4419 
4420 		return 0;
4421 	}
4422 	gfx_v11_0_cp_enable(adev, false);
4423 	gfx_v11_0_enable_gui_idle_interrupt(adev, false);
4424 
4425 	adev->gfxhub.funcs->gart_disable(adev);
4426 
4427 	adev->gfx.is_poweron = false;
4428 
4429 	return 0;
4430 }
4431 
4432 static int gfx_v11_0_suspend(void *handle)
4433 {
4434 	return gfx_v11_0_hw_fini(handle);
4435 }
4436 
4437 static int gfx_v11_0_resume(void *handle)
4438 {
4439 	return gfx_v11_0_hw_init(handle);
4440 }
4441 
4442 static bool gfx_v11_0_is_idle(void *handle)
4443 {
4444 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4445 
4446 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS),
4447 				GRBM_STATUS, GUI_ACTIVE))
4448 		return false;
4449 	else
4450 		return true;
4451 }
4452 
4453 static int gfx_v11_0_wait_for_idle(void *handle)
4454 {
4455 	unsigned i;
4456 	u32 tmp;
4457 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4458 
4459 	for (i = 0; i < adev->usec_timeout; i++) {
4460 		/* read MC_STATUS */
4461 		tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS) &
4462 			GRBM_STATUS__GUI_ACTIVE_MASK;
4463 
4464 		if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
4465 			return 0;
4466 		udelay(1);
4467 	}
4468 	return -ETIMEDOUT;
4469 }
4470 
4471 static int gfx_v11_0_soft_reset(void *handle)
4472 {
4473 	u32 grbm_soft_reset = 0;
4474 	u32 tmp;
4475 	int i, j, k;
4476 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4477 
4478 	tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
4479 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0);
4480 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 0);
4481 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 0);
4482 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0);
4483 	WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
4484 
4485 	gfx_v11_0_set_safe_mode(adev);
4486 
4487 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
4488 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
4489 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
4490 				tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
4491 				tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i);
4492 				tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j);
4493 				tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k);
4494 				WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
4495 
4496 				WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2);
4497 				WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1);
4498 			}
4499 		}
4500 	}
4501 	for (i = 0; i < adev->gfx.me.num_me; ++i) {
4502 		for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
4503 			for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
4504 				tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
4505 				tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i);
4506 				tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j);
4507 				tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k);
4508 				WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
4509 
4510 				WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1);
4511 			}
4512 		}
4513 	}
4514 
4515 	WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe);
4516 
4517 	// Read CP_VMID_RESET register three times.
4518 	// to get sufficient time for GFX_HQD_ACTIVE reach 0
4519 	RREG32_SOC15(GC, 0, regCP_VMID_RESET);
4520 	RREG32_SOC15(GC, 0, regCP_VMID_RESET);
4521 	RREG32_SOC15(GC, 0, regCP_VMID_RESET);
4522 
4523 	for (i = 0; i < adev->usec_timeout; i++) {
4524 		if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) &&
4525 		    !RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE))
4526 			break;
4527 		udelay(1);
4528 	}
4529 	if (i >= adev->usec_timeout) {
4530 		printk("Failed to wait all pipes clean\n");
4531 		return -EINVAL;
4532 	}
4533 
4534 	/**********  trigger soft reset  ***********/
4535 	grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
4536 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4537 					SOFT_RESET_CP, 1);
4538 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4539 					SOFT_RESET_GFX, 1);
4540 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4541 					SOFT_RESET_CPF, 1);
4542 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4543 					SOFT_RESET_CPC, 1);
4544 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4545 					SOFT_RESET_CPG, 1);
4546 	WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset);
4547 	/**********  exit soft reset  ***********/
4548 	grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
4549 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4550 					SOFT_RESET_CP, 0);
4551 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4552 					SOFT_RESET_GFX, 0);
4553 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4554 					SOFT_RESET_CPF, 0);
4555 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4556 					SOFT_RESET_CPC, 0);
4557 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4558 					SOFT_RESET_CPG, 0);
4559 	WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset);
4560 
4561 	tmp = RREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL);
4562 	tmp = REG_SET_FIELD(tmp, CP_SOFT_RESET_CNTL, CMP_HQD_REG_RESET, 0x1);
4563 	WREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL, tmp);
4564 
4565 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, 0x0);
4566 	WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, 0x0);
4567 
4568 	for (i = 0; i < adev->usec_timeout; i++) {
4569 		if (!RREG32_SOC15(GC, 0, regCP_VMID_RESET))
4570 			break;
4571 		udelay(1);
4572 	}
4573 	if (i >= adev->usec_timeout) {
4574 		printk("Failed to wait CP_VMID_RESET to 0\n");
4575 		return -EINVAL;
4576 	}
4577 
4578 	tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
4579 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
4580 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
4581 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
4582 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
4583 	WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
4584 
4585 	gfx_v11_0_unset_safe_mode(adev);
4586 
4587 	return gfx_v11_0_cp_resume(adev);
4588 }
4589 
4590 static bool gfx_v11_0_check_soft_reset(void *handle)
4591 {
4592 	int i, r;
4593 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4594 	struct amdgpu_ring *ring;
4595 	long tmo = msecs_to_jiffies(1000);
4596 
4597 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4598 		ring = &adev->gfx.gfx_ring[i];
4599 		r = amdgpu_ring_test_ib(ring, tmo);
4600 		if (r)
4601 			return true;
4602 	}
4603 
4604 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4605 		ring = &adev->gfx.compute_ring[i];
4606 		r = amdgpu_ring_test_ib(ring, tmo);
4607 		if (r)
4608 			return true;
4609 	}
4610 
4611 	return false;
4612 }
4613 
4614 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4615 {
4616 	uint64_t clock;
4617 
4618 	amdgpu_gfx_off_ctrl(adev, false);
4619 	mutex_lock(&adev->gfx.gpu_clock_mutex);
4620 	clock = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER) |
4621 		((uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER) << 32ULL);
4622 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
4623 	amdgpu_gfx_off_ctrl(adev, true);
4624 	return clock;
4625 }
4626 
4627 static void gfx_v11_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4628 					   uint32_t vmid,
4629 					   uint32_t gds_base, uint32_t gds_size,
4630 					   uint32_t gws_base, uint32_t gws_size,
4631 					   uint32_t oa_base, uint32_t oa_size)
4632 {
4633 	struct amdgpu_device *adev = ring->adev;
4634 
4635 	/* GDS Base */
4636 	gfx_v11_0_write_data_to_reg(ring, 0, false,
4637 				    SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_BASE) + 2 * vmid,
4638 				    gds_base);
4639 
4640 	/* GDS Size */
4641 	gfx_v11_0_write_data_to_reg(ring, 0, false,
4642 				    SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_SIZE) + 2 * vmid,
4643 				    gds_size);
4644 
4645 	/* GWS */
4646 	gfx_v11_0_write_data_to_reg(ring, 0, false,
4647 				    SOC15_REG_OFFSET(GC, 0, regGDS_GWS_VMID0) + vmid,
4648 				    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4649 
4650 	/* OA */
4651 	gfx_v11_0_write_data_to_reg(ring, 0, false,
4652 				    SOC15_REG_OFFSET(GC, 0, regGDS_OA_VMID0) + vmid,
4653 				    (1 << (oa_size + oa_base)) - (1 << oa_base));
4654 }
4655 
4656 static int gfx_v11_0_early_init(void *handle)
4657 {
4658 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4659 
4660 	adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS;
4661 	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4662 					  AMDGPU_MAX_COMPUTE_RINGS);
4663 
4664 	gfx_v11_0_set_kiq_pm4_funcs(adev);
4665 	gfx_v11_0_set_ring_funcs(adev);
4666 	gfx_v11_0_set_irq_funcs(adev);
4667 	gfx_v11_0_set_gds_init(adev);
4668 	gfx_v11_0_set_rlc_funcs(adev);
4669 	gfx_v11_0_set_mqd_funcs(adev);
4670 	gfx_v11_0_set_imu_funcs(adev);
4671 
4672 	gfx_v11_0_init_rlcg_reg_access_ctrl(adev);
4673 
4674 	return 0;
4675 }
4676 
4677 static int gfx_v11_0_ras_late_init(void *handle)
4678 {
4679 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4680 	struct ras_common_if *gfx_common_if;
4681 	int ret;
4682 
4683 	gfx_common_if = kzalloc(sizeof(struct ras_common_if), GFP_KERNEL);
4684 	if (!gfx_common_if)
4685 		return -ENOMEM;
4686 
4687 	gfx_common_if->block = AMDGPU_RAS_BLOCK__GFX;
4688 
4689 	ret = amdgpu_ras_feature_enable(adev, gfx_common_if, true);
4690 	if (ret)
4691 		dev_err(adev->dev, "Failed to enable gfx11 ras feature\n");
4692 
4693 	kfree(gfx_common_if);
4694 	return ret;
4695 }
4696 
4697 static int gfx_v11_0_late_init(void *handle)
4698 {
4699 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4700 	int r;
4701 
4702 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4703 	if (r)
4704 		return r;
4705 
4706 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4707 	if (r)
4708 		return r;
4709 
4710 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3)) {
4711 		r = gfx_v11_0_ras_late_init(handle);
4712 		if (r)
4713 			return r;
4714 	}
4715 
4716 	return 0;
4717 }
4718 
4719 static bool gfx_v11_0_is_rlc_enabled(struct amdgpu_device *adev)
4720 {
4721 	uint32_t rlc_cntl;
4722 
4723 	/* if RLC is not enabled, do nothing */
4724 	rlc_cntl = RREG32_SOC15(GC, 0, regRLC_CNTL);
4725 	return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false;
4726 }
4727 
4728 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev)
4729 {
4730 	uint32_t data;
4731 	unsigned i;
4732 
4733 	data = RLC_SAFE_MODE__CMD_MASK;
4734 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4735 
4736 	WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data);
4737 
4738 	/* wait for RLC_SAFE_MODE */
4739 	for (i = 0; i < adev->usec_timeout; i++) {
4740 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE),
4741 				   RLC_SAFE_MODE, CMD))
4742 			break;
4743 		udelay(1);
4744 	}
4745 }
4746 
4747 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev)
4748 {
4749 	WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK);
4750 }
4751 
4752 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
4753 				      bool enable)
4754 {
4755 	uint32_t def, data;
4756 
4757 	if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK))
4758 		return;
4759 
4760 	def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4761 
4762 	if (enable)
4763 		data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
4764 	else
4765 		data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
4766 
4767 	if (def != data)
4768 		WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4769 }
4770 
4771 static void gfx_v11_0_update_sram_fgcg(struct amdgpu_device *adev,
4772 				       bool enable)
4773 {
4774 	uint32_t def, data;
4775 
4776 	if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG))
4777 		return;
4778 
4779 	def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4780 
4781 	if (enable)
4782 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
4783 	else
4784 		data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
4785 
4786 	if (def != data)
4787 		WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4788 }
4789 
4790 static void gfx_v11_0_update_repeater_fgcg(struct amdgpu_device *adev,
4791 					   bool enable)
4792 {
4793 	uint32_t def, data;
4794 
4795 	if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG))
4796 		return;
4797 
4798 	def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4799 
4800 	if (enable)
4801 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK;
4802 	else
4803 		data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK;
4804 
4805 	if (def != data)
4806 		WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4807 }
4808 
4809 static void gfx_v11_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4810 						       bool enable)
4811 {
4812 	uint32_t data, def;
4813 
4814 	if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)))
4815 		return;
4816 
4817 	/* It is disabled by HW by default */
4818 	if (enable) {
4819 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
4820 			/* 1 - RLC_CGTT_MGCG_OVERRIDE */
4821 			def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4822 
4823 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4824 				  RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4825 				  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
4826 
4827 			if (def != data)
4828 				WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4829 		}
4830 	} else {
4831 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
4832 			def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4833 
4834 			data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4835 				 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4836 				 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
4837 
4838 			if (def != data)
4839 				WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4840 		}
4841 	}
4842 }
4843 
4844 static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4845 						       bool enable)
4846 {
4847 	uint32_t def, data;
4848 
4849 	if (!(adev->cg_flags &
4850 	      (AMD_CG_SUPPORT_GFX_CGCG |
4851 	      AMD_CG_SUPPORT_GFX_CGLS |
4852 	      AMD_CG_SUPPORT_GFX_3D_CGCG |
4853 	      AMD_CG_SUPPORT_GFX_3D_CGLS)))
4854 		return;
4855 
4856 	if (enable) {
4857 		def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4858 
4859 		/* unset CGCG override */
4860 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
4861 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4862 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4863 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4864 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG ||
4865 		    adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4866 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4867 
4868 		/* update CGCG override bits */
4869 		if (def != data)
4870 			WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4871 
4872 		/* enable cgcg FSM(0x0000363F) */
4873 		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
4874 
4875 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
4876 			data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK;
4877 			data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4878 				 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4879 		}
4880 
4881 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
4882 			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK;
4883 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4884 				 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4885 		}
4886 
4887 		if (def != data)
4888 			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
4889 
4890 		/* Program RLC_CGCG_CGLS_CTRL_3D */
4891 		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
4892 
4893 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
4894 			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD_MASK;
4895 			data |= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4896 				 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4897 		}
4898 
4899 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
4900 			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY_MASK;
4901 			data |= (0xf << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4902 				 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4903 		}
4904 
4905 		if (def != data)
4906 			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
4907 
4908 		/* set IDLE_POLL_COUNT(0x00900100) */
4909 		def = data = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL);
4910 
4911 		data &= ~(CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK | CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK);
4912 		data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4913 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4914 
4915 		if (def != data)
4916 			WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data);
4917 
4918 		data = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
4919 		data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
4920 		data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
4921 		data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
4922 		data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
4923 		WREG32_SOC15(GC, 0, regCP_INT_CNTL, data);
4924 
4925 		data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
4926 		data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
4927 		WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
4928 
4929 		/* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
4930 		if (adev->sdma.num_instances > 1) {
4931 			data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
4932 			data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
4933 			WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
4934 		}
4935 	} else {
4936 		/* Program RLC_CGCG_CGLS_CTRL */
4937 		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
4938 
4939 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
4940 			data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4941 
4942 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4943 			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4944 
4945 		if (def != data)
4946 			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
4947 
4948 		/* Program RLC_CGCG_CGLS_CTRL_3D */
4949 		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
4950 
4951 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
4952 			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4953 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4954 			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4955 
4956 		if (def != data)
4957 			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
4958 
4959 		data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
4960 		data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
4961 		WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
4962 
4963 		/* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
4964 		if (adev->sdma.num_instances > 1) {
4965 			data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
4966 			data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
4967 			WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
4968 		}
4969 	}
4970 }
4971 
4972 static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4973 					    bool enable)
4974 {
4975 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4976 
4977 	gfx_v11_0_update_coarse_grain_clock_gating(adev, enable);
4978 
4979 	gfx_v11_0_update_medium_grain_clock_gating(adev, enable);
4980 
4981 	gfx_v11_0_update_repeater_fgcg(adev, enable);
4982 
4983 	gfx_v11_0_update_sram_fgcg(adev, enable);
4984 
4985 	gfx_v11_0_update_perf_clk(adev, enable);
4986 
4987 	if (adev->cg_flags &
4988 	    (AMD_CG_SUPPORT_GFX_MGCG |
4989 	     AMD_CG_SUPPORT_GFX_CGLS |
4990 	     AMD_CG_SUPPORT_GFX_CGCG |
4991 	     AMD_CG_SUPPORT_GFX_3D_CGCG |
4992 	     AMD_CG_SUPPORT_GFX_3D_CGLS))
4993 	        gfx_v11_0_enable_gui_idle_interrupt(adev, enable);
4994 
4995 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4996 
4997 	return 0;
4998 }
4999 
5000 static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
5001 {
5002 	u32 reg, data;
5003 
5004 	amdgpu_gfx_off_ctrl(adev, false);
5005 
5006 	reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL);
5007 	if (amdgpu_sriov_is_pp_one_vf(adev))
5008 		data = RREG32_NO_KIQ(reg);
5009 	else
5010 		data = RREG32(reg);
5011 
5012 	data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
5013 	data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
5014 
5015 	if (amdgpu_sriov_is_pp_one_vf(adev))
5016 		WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data);
5017 	else
5018 		WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data);
5019 
5020 	amdgpu_gfx_off_ctrl(adev, true);
5021 }
5022 
5023 static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = {
5024 	.is_rlc_enabled = gfx_v11_0_is_rlc_enabled,
5025 	.set_safe_mode = gfx_v11_0_set_safe_mode,
5026 	.unset_safe_mode = gfx_v11_0_unset_safe_mode,
5027 	.init = gfx_v11_0_rlc_init,
5028 	.get_csb_size = gfx_v11_0_get_csb_size,
5029 	.get_csb_buffer = gfx_v11_0_get_csb_buffer,
5030 	.resume = gfx_v11_0_rlc_resume,
5031 	.stop = gfx_v11_0_rlc_stop,
5032 	.reset = gfx_v11_0_rlc_reset,
5033 	.start = gfx_v11_0_rlc_start,
5034 	.update_spm_vmid = gfx_v11_0_update_spm_vmid,
5035 };
5036 
5037 static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable)
5038 {
5039 	u32 data = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
5040 
5041 	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5042 		data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5043 	else
5044 		data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5045 
5046 	WREG32_SOC15(GC, 0, regRLC_PG_CNTL, data);
5047 
5048 	// Program RLC_PG_DELAY3 for CGPG hysteresis
5049 	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
5050 		switch (adev->ip_versions[GC_HWIP][0]) {
5051 		case IP_VERSION(11, 0, 1):
5052 			WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1);
5053 			break;
5054 		default:
5055 			break;
5056 		}
5057 	}
5058 }
5059 
5060 static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable)
5061 {
5062 	amdgpu_gfx_rlc_enter_safe_mode(adev);
5063 
5064 	gfx_v11_cntl_power_gating(adev, enable);
5065 
5066 	amdgpu_gfx_rlc_exit_safe_mode(adev);
5067 }
5068 
5069 static int gfx_v11_0_set_powergating_state(void *handle,
5070 					   enum amd_powergating_state state)
5071 {
5072 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5073 	bool enable = (state == AMD_PG_STATE_GATE);
5074 
5075 	if (amdgpu_sriov_vf(adev))
5076 		return 0;
5077 
5078 	switch (adev->ip_versions[GC_HWIP][0]) {
5079 	case IP_VERSION(11, 0, 0):
5080 	case IP_VERSION(11, 0, 2):
5081 	case IP_VERSION(11, 0, 3):
5082 		amdgpu_gfx_off_ctrl(adev, enable);
5083 		break;
5084 	case IP_VERSION(11, 0, 1):
5085 		gfx_v11_cntl_pg(adev, enable);
5086 		amdgpu_gfx_off_ctrl(adev, enable);
5087 		break;
5088 	default:
5089 		break;
5090 	}
5091 
5092 	return 0;
5093 }
5094 
5095 static int gfx_v11_0_set_clockgating_state(void *handle,
5096 					  enum amd_clockgating_state state)
5097 {
5098 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5099 
5100 	if (amdgpu_sriov_vf(adev))
5101 	        return 0;
5102 
5103 	switch (adev->ip_versions[GC_HWIP][0]) {
5104 	case IP_VERSION(11, 0, 0):
5105 	case IP_VERSION(11, 0, 1):
5106 	case IP_VERSION(11, 0, 2):
5107 	case IP_VERSION(11, 0, 3):
5108 	        gfx_v11_0_update_gfx_clock_gating(adev,
5109 	                        state ==  AMD_CG_STATE_GATE);
5110 	        break;
5111 	default:
5112 	        break;
5113 	}
5114 
5115 	return 0;
5116 }
5117 
5118 static void gfx_v11_0_get_clockgating_state(void *handle, u64 *flags)
5119 {
5120 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5121 	int data;
5122 
5123 	/* AMD_CG_SUPPORT_GFX_MGCG */
5124 	data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5125 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5126 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5127 
5128 	/* AMD_CG_SUPPORT_REPEATER_FGCG */
5129 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK))
5130 		*flags |= AMD_CG_SUPPORT_REPEATER_FGCG;
5131 
5132 	/* AMD_CG_SUPPORT_GFX_FGCG */
5133 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK))
5134 		*flags |= AMD_CG_SUPPORT_GFX_FGCG;
5135 
5136 	/* AMD_CG_SUPPORT_GFX_PERF_CLK */
5137 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK))
5138 		*flags |= AMD_CG_SUPPORT_GFX_PERF_CLK;
5139 
5140 	/* AMD_CG_SUPPORT_GFX_CGCG */
5141 	data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
5142 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5143 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5144 
5145 	/* AMD_CG_SUPPORT_GFX_CGLS */
5146 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5147 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5148 
5149 	/* AMD_CG_SUPPORT_GFX_3D_CGCG */
5150 	data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
5151 	if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5152 		*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5153 
5154 	/* AMD_CG_SUPPORT_GFX_3D_CGLS */
5155 	if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5156 		*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5157 }
5158 
5159 static u64 gfx_v11_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5160 {
5161 	/* gfx11 is 32bit rptr*/
5162 	return *(uint32_t *)ring->rptr_cpu_addr;
5163 }
5164 
5165 static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5166 {
5167 	struct amdgpu_device *adev = ring->adev;
5168 	u64 wptr;
5169 
5170 	/* XXX check if swapping is necessary on BE */
5171 	if (ring->use_doorbell) {
5172 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5173 	} else {
5174 		wptr = RREG32_SOC15(GC, 0, regCP_RB0_WPTR);
5175 		wptr += (u64)RREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI) << 32;
5176 	}
5177 
5178 	return wptr;
5179 }
5180 
5181 static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5182 {
5183 	struct amdgpu_device *adev = ring->adev;
5184 	uint32_t *wptr_saved;
5185 	uint32_t *is_queue_unmap;
5186 	uint64_t aggregated_db_index;
5187 	uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size;
5188 	uint64_t wptr_tmp;
5189 
5190 	if (ring->is_mes_queue) {
5191 		wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
5192 		is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
5193 					      sizeof(uint32_t));
5194 		aggregated_db_index =
5195 			amdgpu_mes_get_aggregated_doorbell_index(adev,
5196 								 ring->hw_prio);
5197 
5198 		wptr_tmp = ring->wptr & ring->buf_mask;
5199 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
5200 		*wptr_saved = wptr_tmp;
5201 		/* assume doorbell always being used by mes mapped queue */
5202 		if (*is_queue_unmap) {
5203 			WDOORBELL64(aggregated_db_index, wptr_tmp);
5204 			WDOORBELL64(ring->doorbell_index, wptr_tmp);
5205 		} else {
5206 			WDOORBELL64(ring->doorbell_index, wptr_tmp);
5207 
5208 			if (*is_queue_unmap)
5209 				WDOORBELL64(aggregated_db_index, wptr_tmp);
5210 		}
5211 	} else {
5212 		if (ring->use_doorbell) {
5213 			/* XXX check if swapping is necessary on BE */
5214 			atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
5215 				     ring->wptr);
5216 			WDOORBELL64(ring->doorbell_index, ring->wptr);
5217 		} else {
5218 			WREG32_SOC15(GC, 0, regCP_RB0_WPTR,
5219 				     lower_32_bits(ring->wptr));
5220 			WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI,
5221 				     upper_32_bits(ring->wptr));
5222 		}
5223 	}
5224 }
5225 
5226 static u64 gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5227 {
5228 	/* gfx11 hardware is 32bit rptr */
5229 	return *(uint32_t *)ring->rptr_cpu_addr;
5230 }
5231 
5232 static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5233 {
5234 	u64 wptr;
5235 
5236 	/* XXX check if swapping is necessary on BE */
5237 	if (ring->use_doorbell)
5238 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5239 	else
5240 		BUG();
5241 	return wptr;
5242 }
5243 
5244 static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5245 {
5246 	struct amdgpu_device *adev = ring->adev;
5247 	uint32_t *wptr_saved;
5248 	uint32_t *is_queue_unmap;
5249 	uint64_t aggregated_db_index;
5250 	uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size;
5251 	uint64_t wptr_tmp;
5252 
5253 	if (ring->is_mes_queue) {
5254 		wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
5255 		is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
5256 					      sizeof(uint32_t));
5257 		aggregated_db_index =
5258 			amdgpu_mes_get_aggregated_doorbell_index(adev,
5259 								 ring->hw_prio);
5260 
5261 		wptr_tmp = ring->wptr & ring->buf_mask;
5262 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
5263 		*wptr_saved = wptr_tmp;
5264 		/* assume doorbell always used by mes mapped queue */
5265 		if (*is_queue_unmap) {
5266 			WDOORBELL64(aggregated_db_index, wptr_tmp);
5267 			WDOORBELL64(ring->doorbell_index, wptr_tmp);
5268 		} else {
5269 			WDOORBELL64(ring->doorbell_index, wptr_tmp);
5270 
5271 			if (*is_queue_unmap)
5272 				WDOORBELL64(aggregated_db_index, wptr_tmp);
5273 		}
5274 	} else {
5275 		/* XXX check if swapping is necessary on BE */
5276 		if (ring->use_doorbell) {
5277 			atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
5278 				     ring->wptr);
5279 			WDOORBELL64(ring->doorbell_index, ring->wptr);
5280 		} else {
5281 			BUG(); /* only DOORBELL method supported on gfx11 now */
5282 		}
5283 	}
5284 }
5285 
5286 static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5287 {
5288 	struct amdgpu_device *adev = ring->adev;
5289 	u32 ref_and_mask, reg_mem_engine;
5290 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5291 
5292 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5293 		switch (ring->me) {
5294 		case 1:
5295 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5296 			break;
5297 		case 2:
5298 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5299 			break;
5300 		default:
5301 			return;
5302 		}
5303 		reg_mem_engine = 0;
5304 	} else {
5305 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5306 		reg_mem_engine = 1; /* pfp */
5307 	}
5308 
5309 	gfx_v11_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5310 			       adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5311 			       adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5312 			       ref_and_mask, ref_and_mask, 0x20);
5313 }
5314 
5315 static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5316 				       struct amdgpu_job *job,
5317 				       struct amdgpu_ib *ib,
5318 				       uint32_t flags)
5319 {
5320 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5321 	u32 header, control = 0;
5322 
5323 	BUG_ON(ib->flags & AMDGPU_IB_FLAG_CE);
5324 
5325 	header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5326 
5327 	control |= ib->length_dw | (vmid << 24);
5328 
5329 	if ((amdgpu_sriov_vf(ring->adev) || amdgpu_mcbp) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5330 		control |= INDIRECT_BUFFER_PRE_ENB(1);
5331 
5332 		if (flags & AMDGPU_IB_PREEMPTED)
5333 			control |= INDIRECT_BUFFER_PRE_RESUME(1);
5334 
5335 		if (vmid)
5336 			gfx_v11_0_ring_emit_de_meta(ring,
5337 				    (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false);
5338 	}
5339 
5340 	if (ring->is_mes_queue)
5341 		/* inherit vmid from mqd */
5342 		control |= 0x400000;
5343 
5344 	amdgpu_ring_write(ring, header);
5345 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5346 	amdgpu_ring_write(ring,
5347 #ifdef __BIG_ENDIAN
5348 		(2 << 0) |
5349 #endif
5350 		lower_32_bits(ib->gpu_addr));
5351 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5352 	amdgpu_ring_write(ring, control);
5353 }
5354 
5355 static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5356 					   struct amdgpu_job *job,
5357 					   struct amdgpu_ib *ib,
5358 					   uint32_t flags)
5359 {
5360 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5361 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5362 
5363 	if (ring->is_mes_queue)
5364 		/* inherit vmid from mqd */
5365 		control |= 0x40000000;
5366 
5367 	/* Currently, there is a high possibility to get wave ID mismatch
5368 	 * between ME and GDS, leading to a hw deadlock, because ME generates
5369 	 * different wave IDs than the GDS expects. This situation happens
5370 	 * randomly when at least 5 compute pipes use GDS ordered append.
5371 	 * The wave IDs generated by ME are also wrong after suspend/resume.
5372 	 * Those are probably bugs somewhere else in the kernel driver.
5373 	 *
5374 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5375 	 * GDS to 0 for this ring (me/pipe).
5376 	 */
5377 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5378 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5379 		amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID);
5380 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5381 	}
5382 
5383 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5384 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5385 	amdgpu_ring_write(ring,
5386 #ifdef __BIG_ENDIAN
5387 				(2 << 0) |
5388 #endif
5389 				lower_32_bits(ib->gpu_addr));
5390 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5391 	amdgpu_ring_write(ring, control);
5392 }
5393 
5394 static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5395 				     u64 seq, unsigned flags)
5396 {
5397 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5398 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5399 
5400 	/* RELEASE_MEM - flush caches, send int */
5401 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5402 	amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ |
5403 				 PACKET3_RELEASE_MEM_GCR_GL2_WB |
5404 				 PACKET3_RELEASE_MEM_GCR_GL2_INV |
5405 				 PACKET3_RELEASE_MEM_GCR_GL2_US |
5406 				 PACKET3_RELEASE_MEM_GCR_GL1_INV |
5407 				 PACKET3_RELEASE_MEM_GCR_GLV_INV |
5408 				 PACKET3_RELEASE_MEM_GCR_GLM_INV |
5409 				 PACKET3_RELEASE_MEM_GCR_GLM_WB |
5410 				 PACKET3_RELEASE_MEM_CACHE_POLICY(3) |
5411 				 PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5412 				 PACKET3_RELEASE_MEM_EVENT_INDEX(5)));
5413 	amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) |
5414 				 PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0)));
5415 
5416 	/*
5417 	 * the address should be Qword aligned if 64bit write, Dword
5418 	 * aligned if only send 32bit data low (discard data high)
5419 	 */
5420 	if (write64bit)
5421 		BUG_ON(addr & 0x7);
5422 	else
5423 		BUG_ON(addr & 0x3);
5424 	amdgpu_ring_write(ring, lower_32_bits(addr));
5425 	amdgpu_ring_write(ring, upper_32_bits(addr));
5426 	amdgpu_ring_write(ring, lower_32_bits(seq));
5427 	amdgpu_ring_write(ring, upper_32_bits(seq));
5428 	amdgpu_ring_write(ring, ring->is_mes_queue ?
5429 			 (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0);
5430 }
5431 
5432 static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5433 {
5434 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5435 	uint32_t seq = ring->fence_drv.sync_seq;
5436 	uint64_t addr = ring->fence_drv.gpu_addr;
5437 
5438 	gfx_v11_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr),
5439 			       upper_32_bits(addr), seq, 0xffffffff, 4);
5440 }
5441 
5442 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
5443 				   uint16_t pasid, uint32_t flush_type,
5444 				   bool all_hub, uint8_t dst_sel)
5445 {
5446 	amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
5447 	amdgpu_ring_write(ring,
5448 			  PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) |
5449 			  PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
5450 			  PACKET3_INVALIDATE_TLBS_PASID(pasid) |
5451 			  PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
5452 }
5453 
5454 static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5455 					 unsigned vmid, uint64_t pd_addr)
5456 {
5457 	if (ring->is_mes_queue)
5458 		gfx_v11_0_ring_invalidate_tlbs(ring, 0, 0, false, 0);
5459 	else
5460 		amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5461 
5462 	/* compute doesn't have PFP */
5463 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5464 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5465 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5466 		amdgpu_ring_write(ring, 0x0);
5467 	}
5468 }
5469 
5470 static void gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5471 					  u64 seq, unsigned int flags)
5472 {
5473 	struct amdgpu_device *adev = ring->adev;
5474 
5475 	/* we only allocate 32bit for each seq wb address */
5476 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5477 
5478 	/* write fence seq to the "addr" */
5479 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5480 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5481 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5482 	amdgpu_ring_write(ring, lower_32_bits(addr));
5483 	amdgpu_ring_write(ring, upper_32_bits(addr));
5484 	amdgpu_ring_write(ring, lower_32_bits(seq));
5485 
5486 	if (flags & AMDGPU_FENCE_FLAG_INT) {
5487 		/* set register to trigger INT */
5488 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5489 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5490 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5491 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS));
5492 		amdgpu_ring_write(ring, 0);
5493 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5494 	}
5495 }
5496 
5497 static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
5498 					 uint32_t flags)
5499 {
5500 	uint32_t dw2 = 0;
5501 
5502 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5503 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5504 		/* set load_global_config & load_global_uconfig */
5505 		dw2 |= 0x8001;
5506 		/* set load_cs_sh_regs */
5507 		dw2 |= 0x01000000;
5508 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
5509 		dw2 |= 0x10002;
5510 	}
5511 
5512 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5513 	amdgpu_ring_write(ring, dw2);
5514 	amdgpu_ring_write(ring, 0);
5515 }
5516 
5517 static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5518 {
5519 	unsigned ret;
5520 
5521 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5522 	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5523 	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5524 	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5525 	ret = ring->wptr & ring->buf_mask;
5526 	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5527 
5528 	return ret;
5529 }
5530 
5531 static void gfx_v11_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5532 {
5533 	unsigned cur;
5534 	BUG_ON(offset > ring->buf_mask);
5535 	BUG_ON(ring->ring[offset] != 0x55aa55aa);
5536 
5537 	cur = (ring->wptr - 1) & ring->buf_mask;
5538 	if (likely(cur > offset))
5539 		ring->ring[offset] = cur - offset;
5540 	else
5541 		ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
5542 }
5543 
5544 static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring)
5545 {
5546 	int i, r = 0;
5547 	struct amdgpu_device *adev = ring->adev;
5548 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
5549 	struct amdgpu_ring *kiq_ring = &kiq->ring;
5550 	unsigned long flags;
5551 
5552 	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
5553 		return -EINVAL;
5554 
5555 	spin_lock_irqsave(&kiq->ring_lock, flags);
5556 
5557 	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
5558 		spin_unlock_irqrestore(&kiq->ring_lock, flags);
5559 		return -ENOMEM;
5560 	}
5561 
5562 	/* assert preemption condition */
5563 	amdgpu_ring_set_preempt_cond_exec(ring, false);
5564 
5565 	/* assert IB preemption, emit the trailing fence */
5566 	kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
5567 				   ring->trail_fence_gpu_addr,
5568 				   ++ring->trail_seq);
5569 	amdgpu_ring_commit(kiq_ring);
5570 
5571 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
5572 
5573 	/* poll the trailing fence */
5574 	for (i = 0; i < adev->usec_timeout; i++) {
5575 		if (ring->trail_seq ==
5576 		    le32_to_cpu(*(ring->trail_fence_cpu_addr)))
5577 			break;
5578 		udelay(1);
5579 	}
5580 
5581 	if (i >= adev->usec_timeout) {
5582 		r = -EINVAL;
5583 		DRM_ERROR("ring %d failed to preempt ib\n", ring->idx);
5584 	}
5585 
5586 	/* deassert preemption condition */
5587 	amdgpu_ring_set_preempt_cond_exec(ring, true);
5588 	return r;
5589 }
5590 
5591 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
5592 {
5593 	struct amdgpu_device *adev = ring->adev;
5594 	struct v10_de_ib_state de_payload = {0};
5595 	uint64_t offset, gds_addr, de_payload_gpu_addr;
5596 	void *de_payload_cpu_addr;
5597 	int cnt;
5598 
5599 	if (ring->is_mes_queue) {
5600 		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5601 				  gfx[0].gfx_meta_data) +
5602 			offsetof(struct v10_gfx_meta_data, de_payload);
5603 		de_payload_gpu_addr =
5604 			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5605 		de_payload_cpu_addr =
5606 			amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5607 
5608 		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5609 				  gfx[0].gds_backup) +
5610 			offsetof(struct v10_gfx_meta_data, de_payload);
5611 		gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5612 	} else {
5613 		offset = offsetof(struct v10_gfx_meta_data, de_payload);
5614 		de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5615 		de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5616 
5617 		gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
5618 				 AMDGPU_CSA_SIZE - adev->gds.gds_size,
5619 				 PAGE_SIZE);
5620 	}
5621 
5622 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5623 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5624 
5625 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5626 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5627 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5628 				 WRITE_DATA_DST_SEL(8) |
5629 				 WR_CONFIRM) |
5630 				 WRITE_DATA_CACHE_POLICY(0));
5631 	amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
5632 	amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
5633 
5634 	if (resume)
5635 		amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
5636 					   sizeof(de_payload) >> 2);
5637 	else
5638 		amdgpu_ring_write_multiple(ring, (void *)&de_payload,
5639 					   sizeof(de_payload) >> 2);
5640 }
5641 
5642 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5643 				    bool secure)
5644 {
5645 	uint32_t v = secure ? FRAME_TMZ : 0;
5646 
5647 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5648 	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5649 }
5650 
5651 static void gfx_v11_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5652 				     uint32_t reg_val_offs)
5653 {
5654 	struct amdgpu_device *adev = ring->adev;
5655 
5656 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5657 	amdgpu_ring_write(ring, 0 |	/* src: register*/
5658 				(5 << 8) |	/* dst: memory */
5659 				(1 << 20));	/* write confirm */
5660 	amdgpu_ring_write(ring, reg);
5661 	amdgpu_ring_write(ring, 0);
5662 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5663 				reg_val_offs * 4));
5664 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5665 				reg_val_offs * 4));
5666 }
5667 
5668 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5669 				   uint32_t val)
5670 {
5671 	uint32_t cmd = 0;
5672 
5673 	switch (ring->funcs->type) {
5674 	case AMDGPU_RING_TYPE_GFX:
5675 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5676 		break;
5677 	case AMDGPU_RING_TYPE_KIQ:
5678 		cmd = (1 << 16); /* no inc addr */
5679 		break;
5680 	default:
5681 		cmd = WR_CONFIRM;
5682 		break;
5683 	}
5684 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5685 	amdgpu_ring_write(ring, cmd);
5686 	amdgpu_ring_write(ring, reg);
5687 	amdgpu_ring_write(ring, 0);
5688 	amdgpu_ring_write(ring, val);
5689 }
5690 
5691 static void gfx_v11_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5692 					uint32_t val, uint32_t mask)
5693 {
5694 	gfx_v11_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5695 }
5696 
5697 static void gfx_v11_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5698 						   uint32_t reg0, uint32_t reg1,
5699 						   uint32_t ref, uint32_t mask)
5700 {
5701 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5702 
5703 	gfx_v11_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5704 			       ref, mask, 0x20);
5705 }
5706 
5707 static void gfx_v11_0_ring_soft_recovery(struct amdgpu_ring *ring,
5708 					 unsigned vmid)
5709 {
5710 	struct amdgpu_device *adev = ring->adev;
5711 	uint32_t value = 0;
5712 
5713 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5714 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5715 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5716 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5717 	WREG32_SOC15(GC, 0, regSQ_CMD, value);
5718 }
5719 
5720 static void
5721 gfx_v11_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5722 				      uint32_t me, uint32_t pipe,
5723 				      enum amdgpu_interrupt_state state)
5724 {
5725 	uint32_t cp_int_cntl, cp_int_cntl_reg;
5726 
5727 	if (!me) {
5728 		switch (pipe) {
5729 		case 0:
5730 			cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0);
5731 			break;
5732 		case 1:
5733 			cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1);
5734 			break;
5735 		default:
5736 			DRM_DEBUG("invalid pipe %d\n", pipe);
5737 			return;
5738 		}
5739 	} else {
5740 		DRM_DEBUG("invalid me %d\n", me);
5741 		return;
5742 	}
5743 
5744 	switch (state) {
5745 	case AMDGPU_IRQ_STATE_DISABLE:
5746 		cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
5747 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5748 					    TIME_STAMP_INT_ENABLE, 0);
5749 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5750 					    GENERIC0_INT_ENABLE, 0);
5751 		WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
5752 		break;
5753 	case AMDGPU_IRQ_STATE_ENABLE:
5754 		cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
5755 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5756 					    TIME_STAMP_INT_ENABLE, 1);
5757 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5758 					    GENERIC0_INT_ENABLE, 1);
5759 		WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
5760 		break;
5761 	default:
5762 		break;
5763 	}
5764 }
5765 
5766 static void gfx_v11_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5767 						     int me, int pipe,
5768 						     enum amdgpu_interrupt_state state)
5769 {
5770 	u32 mec_int_cntl, mec_int_cntl_reg;
5771 
5772 	/*
5773 	 * amdgpu controls only the first MEC. That's why this function only
5774 	 * handles the setting of interrupts for this specific MEC. All other
5775 	 * pipes' interrupts are set by amdkfd.
5776 	 */
5777 
5778 	if (me == 1) {
5779 		switch (pipe) {
5780 		case 0:
5781 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
5782 			break;
5783 		case 1:
5784 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL);
5785 			break;
5786 		case 2:
5787 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL);
5788 			break;
5789 		case 3:
5790 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL);
5791 			break;
5792 		default:
5793 			DRM_DEBUG("invalid pipe %d\n", pipe);
5794 			return;
5795 		}
5796 	} else {
5797 		DRM_DEBUG("invalid me %d\n", me);
5798 		return;
5799 	}
5800 
5801 	switch (state) {
5802 	case AMDGPU_IRQ_STATE_DISABLE:
5803 		mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
5804 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5805 					     TIME_STAMP_INT_ENABLE, 0);
5806 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5807 					     GENERIC0_INT_ENABLE, 0);
5808 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5809 		break;
5810 	case AMDGPU_IRQ_STATE_ENABLE:
5811 		mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
5812 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5813 					     TIME_STAMP_INT_ENABLE, 1);
5814 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5815 					     GENERIC0_INT_ENABLE, 1);
5816 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5817 		break;
5818 	default:
5819 		break;
5820 	}
5821 }
5822 
5823 static int gfx_v11_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5824 					    struct amdgpu_irq_src *src,
5825 					    unsigned type,
5826 					    enum amdgpu_interrupt_state state)
5827 {
5828 	switch (type) {
5829 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5830 		gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 0, state);
5831 		break;
5832 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP:
5833 		gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 1, state);
5834 		break;
5835 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5836 		gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5837 		break;
5838 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5839 		gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5840 		break;
5841 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5842 		gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5843 		break;
5844 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5845 		gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5846 		break;
5847 	default:
5848 		break;
5849 	}
5850 	return 0;
5851 }
5852 
5853 static int gfx_v11_0_eop_irq(struct amdgpu_device *adev,
5854 			     struct amdgpu_irq_src *source,
5855 			     struct amdgpu_iv_entry *entry)
5856 {
5857 	int i;
5858 	u8 me_id, pipe_id, queue_id;
5859 	struct amdgpu_ring *ring;
5860 	uint32_t mes_queue_id = entry->src_data[0];
5861 
5862 	DRM_DEBUG("IH: CP EOP\n");
5863 
5864 	if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
5865 		struct amdgpu_mes_queue *queue;
5866 
5867 		mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
5868 
5869 		spin_lock(&adev->mes.queue_id_lock);
5870 		queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
5871 		if (queue) {
5872 			DRM_DEBUG("process mes queue id = %d\n", mes_queue_id);
5873 			amdgpu_fence_process(queue->ring);
5874 		}
5875 		spin_unlock(&adev->mes.queue_id_lock);
5876 	} else {
5877 		me_id = (entry->ring_id & 0x0c) >> 2;
5878 		pipe_id = (entry->ring_id & 0x03) >> 0;
5879 		queue_id = (entry->ring_id & 0x70) >> 4;
5880 
5881 		switch (me_id) {
5882 		case 0:
5883 			if (pipe_id == 0)
5884 				amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5885 			else
5886 				amdgpu_fence_process(&adev->gfx.gfx_ring[1]);
5887 			break;
5888 		case 1:
5889 		case 2:
5890 			for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5891 				ring = &adev->gfx.compute_ring[i];
5892 				/* Per-queue interrupt is supported for MEC starting from VI.
5893 				 * The interrupt can only be enabled/disabled per pipe instead
5894 				 * of per queue.
5895 				 */
5896 				if ((ring->me == me_id) &&
5897 				    (ring->pipe == pipe_id) &&
5898 				    (ring->queue == queue_id))
5899 					amdgpu_fence_process(ring);
5900 			}
5901 			break;
5902 		}
5903 	}
5904 
5905 	return 0;
5906 }
5907 
5908 static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5909 					      struct amdgpu_irq_src *source,
5910 					      unsigned type,
5911 					      enum amdgpu_interrupt_state state)
5912 {
5913 	switch (state) {
5914 	case AMDGPU_IRQ_STATE_DISABLE:
5915 	case AMDGPU_IRQ_STATE_ENABLE:
5916 		WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0,
5917 			       PRIV_REG_INT_ENABLE,
5918 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5919 		break;
5920 	default:
5921 		break;
5922 	}
5923 
5924 	return 0;
5925 }
5926 
5927 static int gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5928 					       struct amdgpu_irq_src *source,
5929 					       unsigned type,
5930 					       enum amdgpu_interrupt_state state)
5931 {
5932 	switch (state) {
5933 	case AMDGPU_IRQ_STATE_DISABLE:
5934 	case AMDGPU_IRQ_STATE_ENABLE:
5935 		WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0,
5936 			       PRIV_INSTR_INT_ENABLE,
5937 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5938 		break;
5939 	default:
5940 		break;
5941 	}
5942 
5943 	return 0;
5944 }
5945 
5946 static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev,
5947 					struct amdgpu_iv_entry *entry)
5948 {
5949 	u8 me_id, pipe_id, queue_id;
5950 	struct amdgpu_ring *ring;
5951 	int i;
5952 
5953 	me_id = (entry->ring_id & 0x0c) >> 2;
5954 	pipe_id = (entry->ring_id & 0x03) >> 0;
5955 	queue_id = (entry->ring_id & 0x70) >> 4;
5956 
5957 	switch (me_id) {
5958 	case 0:
5959 		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
5960 			ring = &adev->gfx.gfx_ring[i];
5961 			/* we only enabled 1 gfx queue per pipe for now */
5962 			if (ring->me == me_id && ring->pipe == pipe_id)
5963 				drm_sched_fault(&ring->sched);
5964 		}
5965 		break;
5966 	case 1:
5967 	case 2:
5968 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5969 			ring = &adev->gfx.compute_ring[i];
5970 			if (ring->me == me_id && ring->pipe == pipe_id &&
5971 			    ring->queue == queue_id)
5972 				drm_sched_fault(&ring->sched);
5973 		}
5974 		break;
5975 	default:
5976 		BUG();
5977 		break;
5978 	}
5979 }
5980 
5981 static int gfx_v11_0_priv_reg_irq(struct amdgpu_device *adev,
5982 				  struct amdgpu_irq_src *source,
5983 				  struct amdgpu_iv_entry *entry)
5984 {
5985 	DRM_ERROR("Illegal register access in command stream\n");
5986 	gfx_v11_0_handle_priv_fault(adev, entry);
5987 	return 0;
5988 }
5989 
5990 static int gfx_v11_0_priv_inst_irq(struct amdgpu_device *adev,
5991 				   struct amdgpu_irq_src *source,
5992 				   struct amdgpu_iv_entry *entry)
5993 {
5994 	DRM_ERROR("Illegal instruction in command stream\n");
5995 	gfx_v11_0_handle_priv_fault(adev, entry);
5996 	return 0;
5997 }
5998 
5999 #if 0
6000 static int gfx_v11_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6001 					     struct amdgpu_irq_src *src,
6002 					     unsigned int type,
6003 					     enum amdgpu_interrupt_state state)
6004 {
6005 	uint32_t tmp, target;
6006 	struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6007 
6008 	target = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
6009 	target += ring->pipe;
6010 
6011 	switch (type) {
6012 	case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6013 		if (state == AMDGPU_IRQ_STATE_DISABLE) {
6014 			tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL);
6015 			tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6016 					    GENERIC2_INT_ENABLE, 0);
6017 			WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp);
6018 
6019 			tmp = RREG32_SOC15_IP(GC, target);
6020 			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL,
6021 					    GENERIC2_INT_ENABLE, 0);
6022 			WREG32_SOC15_IP(GC, target, tmp);
6023 		} else {
6024 			tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL);
6025 			tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6026 					    GENERIC2_INT_ENABLE, 1);
6027 			WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp);
6028 
6029 			tmp = RREG32_SOC15_IP(GC, target);
6030 			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL,
6031 					    GENERIC2_INT_ENABLE, 1);
6032 			WREG32_SOC15_IP(GC, target, tmp);
6033 		}
6034 		break;
6035 	default:
6036 		BUG(); /* kiq only support GENERIC2_INT now */
6037 		break;
6038 	}
6039 	return 0;
6040 }
6041 #endif
6042 
6043 static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring)
6044 {
6045 	const unsigned int gcr_cntl =
6046 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) |
6047 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) |
6048 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) |
6049 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) |
6050 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) |
6051 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) |
6052 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) |
6053 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1);
6054 
6055 	/* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */
6056 	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6));
6057 	amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */
6058 	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6059 	amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
6060 	amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6061 	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
6062 	amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6063 	amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
6064 }
6065 
6066 static const struct amd_ip_funcs gfx_v11_0_ip_funcs = {
6067 	.name = "gfx_v11_0",
6068 	.early_init = gfx_v11_0_early_init,
6069 	.late_init = gfx_v11_0_late_init,
6070 	.sw_init = gfx_v11_0_sw_init,
6071 	.sw_fini = gfx_v11_0_sw_fini,
6072 	.hw_init = gfx_v11_0_hw_init,
6073 	.hw_fini = gfx_v11_0_hw_fini,
6074 	.suspend = gfx_v11_0_suspend,
6075 	.resume = gfx_v11_0_resume,
6076 	.is_idle = gfx_v11_0_is_idle,
6077 	.wait_for_idle = gfx_v11_0_wait_for_idle,
6078 	.soft_reset = gfx_v11_0_soft_reset,
6079 	.check_soft_reset = gfx_v11_0_check_soft_reset,
6080 	.set_clockgating_state = gfx_v11_0_set_clockgating_state,
6081 	.set_powergating_state = gfx_v11_0_set_powergating_state,
6082 	.get_clockgating_state = gfx_v11_0_get_clockgating_state,
6083 };
6084 
6085 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
6086 	.type = AMDGPU_RING_TYPE_GFX,
6087 	.align_mask = 0xff,
6088 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6089 	.support_64bit_ptrs = true,
6090 	.secure_submission_supported = true,
6091 	.vmhub = AMDGPU_GFXHUB_0,
6092 	.get_rptr = gfx_v11_0_ring_get_rptr_gfx,
6093 	.get_wptr = gfx_v11_0_ring_get_wptr_gfx,
6094 	.set_wptr = gfx_v11_0_ring_set_wptr_gfx,
6095 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
6096 		5 + /* COND_EXEC */
6097 		7 + /* PIPELINE_SYNC */
6098 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6099 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6100 		2 + /* VM_FLUSH */
6101 		8 + /* FENCE for VM_FLUSH */
6102 		20 + /* GDS switch */
6103 		5 + /* COND_EXEC */
6104 		7 + /* HDP_flush */
6105 		4 + /* VGT_flush */
6106 		31 + /*	DE_META */
6107 		3 + /* CNTX_CTRL */
6108 		5 + /* HDP_INVL */
6109 		8 + 8 + /* FENCE x2 */
6110 		8, /* gfx_v11_0_emit_mem_sync */
6111 	.emit_ib_size =	4, /* gfx_v11_0_ring_emit_ib_gfx */
6112 	.emit_ib = gfx_v11_0_ring_emit_ib_gfx,
6113 	.emit_fence = gfx_v11_0_ring_emit_fence,
6114 	.emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync,
6115 	.emit_vm_flush = gfx_v11_0_ring_emit_vm_flush,
6116 	.emit_gds_switch = gfx_v11_0_ring_emit_gds_switch,
6117 	.emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
6118 	.test_ring = gfx_v11_0_ring_test_ring,
6119 	.test_ib = gfx_v11_0_ring_test_ib,
6120 	.insert_nop = amdgpu_ring_insert_nop,
6121 	.pad_ib = amdgpu_ring_generic_pad_ib,
6122 	.emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl,
6123 	.init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec,
6124 	.patch_cond_exec = gfx_v11_0_ring_emit_patch_cond_exec,
6125 	.preempt_ib = gfx_v11_0_ring_preempt_ib,
6126 	.emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl,
6127 	.emit_wreg = gfx_v11_0_ring_emit_wreg,
6128 	.emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
6129 	.emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
6130 	.soft_recovery = gfx_v11_0_ring_soft_recovery,
6131 	.emit_mem_sync = gfx_v11_0_emit_mem_sync,
6132 };
6133 
6134 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
6135 	.type = AMDGPU_RING_TYPE_COMPUTE,
6136 	.align_mask = 0xff,
6137 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6138 	.support_64bit_ptrs = true,
6139 	.vmhub = AMDGPU_GFXHUB_0,
6140 	.get_rptr = gfx_v11_0_ring_get_rptr_compute,
6141 	.get_wptr = gfx_v11_0_ring_get_wptr_compute,
6142 	.set_wptr = gfx_v11_0_ring_set_wptr_compute,
6143 	.emit_frame_size =
6144 		20 + /* gfx_v11_0_ring_emit_gds_switch */
6145 		7 + /* gfx_v11_0_ring_emit_hdp_flush */
6146 		5 + /* hdp invalidate */
6147 		7 + /* gfx_v11_0_ring_emit_pipeline_sync */
6148 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6149 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6150 		2 + /* gfx_v11_0_ring_emit_vm_flush */
6151 		8 + 8 + 8 + /* gfx_v11_0_ring_emit_fence x3 for user fence, vm fence */
6152 		8, /* gfx_v11_0_emit_mem_sync */
6153 	.emit_ib_size =	7, /* gfx_v11_0_ring_emit_ib_compute */
6154 	.emit_ib = gfx_v11_0_ring_emit_ib_compute,
6155 	.emit_fence = gfx_v11_0_ring_emit_fence,
6156 	.emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync,
6157 	.emit_vm_flush = gfx_v11_0_ring_emit_vm_flush,
6158 	.emit_gds_switch = gfx_v11_0_ring_emit_gds_switch,
6159 	.emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
6160 	.test_ring = gfx_v11_0_ring_test_ring,
6161 	.test_ib = gfx_v11_0_ring_test_ib,
6162 	.insert_nop = amdgpu_ring_insert_nop,
6163 	.pad_ib = amdgpu_ring_generic_pad_ib,
6164 	.emit_wreg = gfx_v11_0_ring_emit_wreg,
6165 	.emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
6166 	.emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
6167 	.emit_mem_sync = gfx_v11_0_emit_mem_sync,
6168 };
6169 
6170 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = {
6171 	.type = AMDGPU_RING_TYPE_KIQ,
6172 	.align_mask = 0xff,
6173 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6174 	.support_64bit_ptrs = true,
6175 	.vmhub = AMDGPU_GFXHUB_0,
6176 	.get_rptr = gfx_v11_0_ring_get_rptr_compute,
6177 	.get_wptr = gfx_v11_0_ring_get_wptr_compute,
6178 	.set_wptr = gfx_v11_0_ring_set_wptr_compute,
6179 	.emit_frame_size =
6180 		20 + /* gfx_v11_0_ring_emit_gds_switch */
6181 		7 + /* gfx_v11_0_ring_emit_hdp_flush */
6182 		5 + /*hdp invalidate */
6183 		7 + /* gfx_v11_0_ring_emit_pipeline_sync */
6184 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6185 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6186 		2 + /* gfx_v11_0_ring_emit_vm_flush */
6187 		8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6188 	.emit_ib_size =	7, /* gfx_v11_0_ring_emit_ib_compute */
6189 	.emit_ib = gfx_v11_0_ring_emit_ib_compute,
6190 	.emit_fence = gfx_v11_0_ring_emit_fence_kiq,
6191 	.test_ring = gfx_v11_0_ring_test_ring,
6192 	.test_ib = gfx_v11_0_ring_test_ib,
6193 	.insert_nop = amdgpu_ring_insert_nop,
6194 	.pad_ib = amdgpu_ring_generic_pad_ib,
6195 	.emit_rreg = gfx_v11_0_ring_emit_rreg,
6196 	.emit_wreg = gfx_v11_0_ring_emit_wreg,
6197 	.emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
6198 	.emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
6199 };
6200 
6201 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev)
6202 {
6203 	int i;
6204 
6205 	adev->gfx.kiq.ring.funcs = &gfx_v11_0_ring_funcs_kiq;
6206 
6207 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6208 		adev->gfx.gfx_ring[i].funcs = &gfx_v11_0_ring_funcs_gfx;
6209 
6210 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6211 		adev->gfx.compute_ring[i].funcs = &gfx_v11_0_ring_funcs_compute;
6212 }
6213 
6214 static const struct amdgpu_irq_src_funcs gfx_v11_0_eop_irq_funcs = {
6215 	.set = gfx_v11_0_set_eop_interrupt_state,
6216 	.process = gfx_v11_0_eop_irq,
6217 };
6218 
6219 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_reg_irq_funcs = {
6220 	.set = gfx_v11_0_set_priv_reg_fault_state,
6221 	.process = gfx_v11_0_priv_reg_irq,
6222 };
6223 
6224 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = {
6225 	.set = gfx_v11_0_set_priv_inst_fault_state,
6226 	.process = gfx_v11_0_priv_inst_irq,
6227 };
6228 
6229 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev)
6230 {
6231 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6232 	adev->gfx.eop_irq.funcs = &gfx_v11_0_eop_irq_funcs;
6233 
6234 	adev->gfx.priv_reg_irq.num_types = 1;
6235 	adev->gfx.priv_reg_irq.funcs = &gfx_v11_0_priv_reg_irq_funcs;
6236 
6237 	adev->gfx.priv_inst_irq.num_types = 1;
6238 	adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs;
6239 }
6240 
6241 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev)
6242 {
6243 	if (adev->flags & AMD_IS_APU)
6244 		adev->gfx.imu.mode = MISSION_MODE;
6245 	else
6246 		adev->gfx.imu.mode = DEBUG_MODE;
6247 
6248 	adev->gfx.imu.funcs = &gfx_v11_0_imu_funcs;
6249 }
6250 
6251 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev)
6252 {
6253 	adev->gfx.rlc.funcs = &gfx_v11_0_rlc_funcs;
6254 }
6255 
6256 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev)
6257 {
6258 	unsigned total_cu = adev->gfx.config.max_cu_per_sh *
6259 			    adev->gfx.config.max_sh_per_se *
6260 			    adev->gfx.config.max_shader_engines;
6261 
6262 	adev->gds.gds_size = 0x1000;
6263 	adev->gds.gds_compute_max_wave_id = total_cu * 32 - 1;
6264 	adev->gds.gws_size = 64;
6265 	adev->gds.oa_size = 16;
6266 }
6267 
6268 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev)
6269 {
6270 	/* set gfx eng mqd */
6271 	adev->mqds[AMDGPU_HW_IP_GFX].mqd_size =
6272 		sizeof(struct v11_gfx_mqd);
6273 	adev->mqds[AMDGPU_HW_IP_GFX].init_mqd =
6274 		gfx_v11_0_gfx_mqd_init;
6275 	/* set compute eng mqd */
6276 	adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size =
6277 		sizeof(struct v11_compute_mqd);
6278 	adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd =
6279 		gfx_v11_0_compute_mqd_init;
6280 }
6281 
6282 static void gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev,
6283 							  u32 bitmap)
6284 {
6285 	u32 data;
6286 
6287 	if (!bitmap)
6288 		return;
6289 
6290 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
6291 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
6292 
6293 	WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data);
6294 }
6295 
6296 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev)
6297 {
6298 	u32 data, wgp_bitmask;
6299 	data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG);
6300 	data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG);
6301 
6302 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
6303 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
6304 
6305 	wgp_bitmask =
6306 		amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1);
6307 
6308 	return (~data) & wgp_bitmask;
6309 }
6310 
6311 static u32 gfx_v11_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev)
6312 {
6313 	u32 wgp_idx, wgp_active_bitmap;
6314 	u32 cu_bitmap_per_wgp, cu_active_bitmap;
6315 
6316 	wgp_active_bitmap = gfx_v11_0_get_wgp_active_bitmap_per_sh(adev);
6317 	cu_active_bitmap = 0;
6318 
6319 	for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) {
6320 		/* if there is one WGP enabled, it means 2 CUs will be enabled */
6321 		cu_bitmap_per_wgp = 3 << (2 * wgp_idx);
6322 		if (wgp_active_bitmap & (1 << wgp_idx))
6323 			cu_active_bitmap |= cu_bitmap_per_wgp;
6324 	}
6325 
6326 	return cu_active_bitmap;
6327 }
6328 
6329 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
6330 				 struct amdgpu_cu_info *cu_info)
6331 {
6332 	int i, j, k, counter, active_cu_number = 0;
6333 	u32 mask, bitmap;
6334 	unsigned disable_masks[8 * 2];
6335 
6336 	if (!adev || !cu_info)
6337 		return -EINVAL;
6338 
6339 	amdgpu_gfx_parse_disable_cu(disable_masks, 8, 2);
6340 
6341 	mutex_lock(&adev->grbm_idx_mutex);
6342 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6343 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6344 			mask = 1;
6345 			counter = 0;
6346 			gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff);
6347 			if (i < 8 && j < 2)
6348 				gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(
6349 					adev, disable_masks[i * 2 + j]);
6350 			bitmap = gfx_v11_0_get_cu_active_bitmap_per_sh(adev);
6351 
6352 			/**
6353 			 * GFX11 could support more than 4 SEs, while the bitmap
6354 			 * in cu_info struct is 4x4 and ioctl interface struct
6355 			 * drm_amdgpu_info_device should keep stable.
6356 			 * So we use last two columns of bitmap to store cu mask for
6357 			 * SEs 4 to 7, the layout of the bitmap is as below:
6358 			 *    SE0: {SH0,SH1} --> {bitmap[0][0], bitmap[0][1]}
6359 			 *    SE1: {SH0,SH1} --> {bitmap[1][0], bitmap[1][1]}
6360 			 *    SE2: {SH0,SH1} --> {bitmap[2][0], bitmap[2][1]}
6361 			 *    SE3: {SH0,SH1} --> {bitmap[3][0], bitmap[3][1]}
6362 			 *    SE4: {SH0,SH1} --> {bitmap[0][2], bitmap[0][3]}
6363 			 *    SE5: {SH0,SH1} --> {bitmap[1][2], bitmap[1][3]}
6364 			 *    SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]}
6365 			 *    SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]}
6366 			 */
6367 			cu_info->bitmap[i % 4][j + (i / 4) * 2] = bitmap;
6368 
6369 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
6370 				if (bitmap & mask)
6371 					counter++;
6372 
6373 				mask <<= 1;
6374 			}
6375 			active_cu_number += counter;
6376 		}
6377 	}
6378 	gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6379 	mutex_unlock(&adev->grbm_idx_mutex);
6380 
6381 	cu_info->number = active_cu_number;
6382 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6383 
6384 	return 0;
6385 }
6386 
6387 const struct amdgpu_ip_block_version gfx_v11_0_ip_block =
6388 {
6389 	.type = AMD_IP_BLOCK_TYPE_GFX,
6390 	.major = 11,
6391 	.minor = 0,
6392 	.rev = 0,
6393 	.funcs = &gfx_v11_0_ip_funcs,
6394 };
6395