xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c (revision 6246ed09111fbb17168619006b4380103c6673c3)
1 /*
2  * Copyright 2021 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/delay.h>
24 #include <linux/kernel.h>
25 #include <linux/firmware.h>
26 #include <linux/module.h>
27 #include <linux/pci.h>
28 #include "amdgpu.h"
29 #include "amdgpu_gfx.h"
30 #include "amdgpu_psp.h"
31 #include "amdgpu_smu.h"
32 #include "amdgpu_atomfirmware.h"
33 #include "imu_v11_0.h"
34 #include "soc21.h"
35 #include "nvd.h"
36 
37 #include "gc/gc_11_0_0_offset.h"
38 #include "gc/gc_11_0_0_sh_mask.h"
39 #include "smuio/smuio_13_0_6_offset.h"
40 #include "smuio/smuio_13_0_6_sh_mask.h"
41 #include "navi10_enum.h"
42 #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h"
43 
44 #include "soc15.h"
45 #include "soc15d.h"
46 #include "clearstate_gfx11.h"
47 #include "v11_structs.h"
48 #include "gfx_v11_0.h"
49 #include "nbio_v4_3.h"
50 #include "mes_v11_0.h"
51 
52 #define GFX11_NUM_GFX_RINGS		1
53 #define GFX11_MEC_HPD_SIZE	2048
54 
55 #define RLCG_UCODE_LOADING_START_ADDRESS	0x00002000L
56 #define RLC_PG_DELAY_3_DEFAULT_GC_11_0_1	0x1388
57 
58 #define regCGTT_WD_CLK_CTRL		0x5086
59 #define regCGTT_WD_CLK_CTRL_BASE_IDX	1
60 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1	0x4e7e
61 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1_BASE_IDX	1
62 
63 MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin");
64 MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin");
65 MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin");
66 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin");
67 MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin");
68 MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin");
69 MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin");
70 MODULE_FIRMWARE("amdgpu/gc_11_0_1_mec.bin");
71 MODULE_FIRMWARE("amdgpu/gc_11_0_1_rlc.bin");
72 MODULE_FIRMWARE("amdgpu/gc_11_0_2_pfp.bin");
73 MODULE_FIRMWARE("amdgpu/gc_11_0_2_me.bin");
74 MODULE_FIRMWARE("amdgpu/gc_11_0_2_mec.bin");
75 MODULE_FIRMWARE("amdgpu/gc_11_0_2_rlc.bin");
76 
77 static const struct soc15_reg_golden golden_settings_gc_11_0[] =
78 {
79 	/* Pending on emulation bring up */
80 };
81 
82 static const struct soc15_reg_golden golden_settings_gc_11_0_0[] =
83 {
84 	/* Pending on emulation bring up */
85 };
86 
87 static const struct soc15_reg_golden golden_settings_gc_rlc_spm_11_0[] =
88 {
89 	/* Pending on emulation bring up */
90 };
91 
92 static const struct soc15_reg_golden golden_settings_gc_11_0_1[] =
93 {
94 	SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_GS_NGG_CLK_CTRL, 0x9fff8fff, 0x00000010),
95 	SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_WD_CLK_CTRL, 0xffff8fff, 0x00000010),
96 	SOC15_REG_GOLDEN_VALUE(GC, 0, regCPF_GCR_CNTL, 0x0007ffff, 0x0000c200),
97 	SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xffff001b, 0x00f01988),
98 	SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf0ffffff, 0x00880007),
99 	SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_ENHANCE_3, 0xfffffffd, 0x00000008),
100 	SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_VRS_SURFACE_CNTL_1, 0xfff891ff, 0x55480100),
101 	SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000),
102 	SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xfcffffff, 0x0000000a)
103 };
104 
105 #define DEFAULT_SH_MEM_CONFIG \
106 	((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
107 	 (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
108 	 (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT))
109 
110 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev);
111 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev);
112 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev);
113 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev);
114 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev);
115 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev);
116 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev);
117 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
118                                  struct amdgpu_cu_info *cu_info);
119 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev);
120 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
121 				   u32 sh_num, u32 instance);
122 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev);
123 
124 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
125 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
126 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
127 				     uint32_t val);
128 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
129 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
130 					   uint16_t pasid, uint32_t flush_type,
131 					   bool all_hub, uint8_t dst_sel);
132 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev);
133 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev);
134 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
135 				      bool enable);
136 
137 static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
138 {
139 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
140 	amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
141 			  PACKET3_SET_RESOURCES_QUEUE_TYPE(0));	/* vmid_mask:0 queue_type:0 (KIQ) */
142 	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
143 	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
144 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
145 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
146 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
147 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
148 }
149 
150 static void gfx11_kiq_map_queues(struct amdgpu_ring *kiq_ring,
151 				 struct amdgpu_ring *ring)
152 {
153 	uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
154 	uint64_t wptr_addr = ring->wptr_gpu_addr;
155 	uint32_t me = 0, eng_sel = 0;
156 
157 	switch (ring->funcs->type) {
158 	case AMDGPU_RING_TYPE_COMPUTE:
159 		me = 1;
160 		eng_sel = 0;
161 		break;
162 	case AMDGPU_RING_TYPE_GFX:
163 		me = 0;
164 		eng_sel = 4;
165 		break;
166 	case AMDGPU_RING_TYPE_MES:
167 		me = 2;
168 		eng_sel = 5;
169 		break;
170 	default:
171 		WARN_ON(1);
172 	}
173 
174 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
175 	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
176 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
177 			  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
178 			  PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
179 			  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
180 			  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
181 			  PACKET3_MAP_QUEUES_ME((me)) |
182 			  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
183 			  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
184 			  PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
185 			  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
186 	amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
187 	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
188 	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
189 	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
190 	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
191 }
192 
193 static void gfx11_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
194 				   struct amdgpu_ring *ring,
195 				   enum amdgpu_unmap_queues_action action,
196 				   u64 gpu_addr, u64 seq)
197 {
198 	struct amdgpu_device *adev = kiq_ring->adev;
199 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
200 
201 	if (adev->enable_mes && !adev->gfx.kiq.ring.sched.ready) {
202 		amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq);
203 		return;
204 	}
205 
206 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
207 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
208 			  PACKET3_UNMAP_QUEUES_ACTION(action) |
209 			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
210 			  PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
211 			  PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
212 	amdgpu_ring_write(kiq_ring,
213 		  PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
214 
215 	if (action == PREEMPT_QUEUES_NO_UNMAP) {
216 		amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
217 		amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
218 		amdgpu_ring_write(kiq_ring, seq);
219 	} else {
220 		amdgpu_ring_write(kiq_ring, 0);
221 		amdgpu_ring_write(kiq_ring, 0);
222 		amdgpu_ring_write(kiq_ring, 0);
223 	}
224 }
225 
226 static void gfx11_kiq_query_status(struct amdgpu_ring *kiq_ring,
227 				   struct amdgpu_ring *ring,
228 				   u64 addr,
229 				   u64 seq)
230 {
231 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
232 
233 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
234 	amdgpu_ring_write(kiq_ring,
235 			  PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
236 			  PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
237 			  PACKET3_QUERY_STATUS_COMMAND(2));
238 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
239 			  PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
240 			  PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
241 	amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
242 	amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
243 	amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
244 	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
245 }
246 
247 static void gfx11_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
248 				uint16_t pasid, uint32_t flush_type,
249 				bool all_hub)
250 {
251 	gfx_v11_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1);
252 }
253 
254 static const struct kiq_pm4_funcs gfx_v11_0_kiq_pm4_funcs = {
255 	.kiq_set_resources = gfx11_kiq_set_resources,
256 	.kiq_map_queues = gfx11_kiq_map_queues,
257 	.kiq_unmap_queues = gfx11_kiq_unmap_queues,
258 	.kiq_query_status = gfx11_kiq_query_status,
259 	.kiq_invalidate_tlbs = gfx11_kiq_invalidate_tlbs,
260 	.set_resources_size = 8,
261 	.map_queues_size = 7,
262 	.unmap_queues_size = 6,
263 	.query_status_size = 7,
264 	.invalidate_tlbs_size = 2,
265 };
266 
267 static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
268 {
269 	adev->gfx.kiq.pmf = &gfx_v11_0_kiq_pm4_funcs;
270 }
271 
272 static void gfx_v11_0_init_spm_golden_registers(struct amdgpu_device *adev)
273 {
274 	switch (adev->ip_versions[GC_HWIP][0]) {
275 	case IP_VERSION(11, 0, 0):
276 		soc15_program_register_sequence(adev,
277 						golden_settings_gc_rlc_spm_11_0,
278 						(const u32)ARRAY_SIZE(golden_settings_gc_rlc_spm_11_0));
279 		break;
280 	default:
281 		break;
282 	}
283 }
284 
285 static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev)
286 {
287 	switch (adev->ip_versions[GC_HWIP][0]) {
288 	case IP_VERSION(11, 0, 0):
289 		soc15_program_register_sequence(adev,
290 						golden_settings_gc_11_0,
291 						(const u32)ARRAY_SIZE(golden_settings_gc_11_0));
292 		soc15_program_register_sequence(adev,
293 						golden_settings_gc_11_0_0,
294 						(const u32)ARRAY_SIZE(golden_settings_gc_11_0_0));
295 		break;
296 	case IP_VERSION(11, 0, 1):
297 		soc15_program_register_sequence(adev,
298 						golden_settings_gc_11_0,
299 						(const u32)ARRAY_SIZE(golden_settings_gc_11_0));
300 		soc15_program_register_sequence(adev,
301 						golden_settings_gc_11_0_1,
302 						(const u32)ARRAY_SIZE(golden_settings_gc_11_0_1));
303 		break;
304 	default:
305 		break;
306 	}
307 	gfx_v11_0_init_spm_golden_registers(adev);
308 }
309 
310 static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
311 				       bool wc, uint32_t reg, uint32_t val)
312 {
313 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
314 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
315 			  WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0));
316 	amdgpu_ring_write(ring, reg);
317 	amdgpu_ring_write(ring, 0);
318 	amdgpu_ring_write(ring, val);
319 }
320 
321 static void gfx_v11_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
322 				  int mem_space, int opt, uint32_t addr0,
323 				  uint32_t addr1, uint32_t ref, uint32_t mask,
324 				  uint32_t inv)
325 {
326 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
327 	amdgpu_ring_write(ring,
328 			  /* memory (1) or register (0) */
329 			  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
330 			   WAIT_REG_MEM_OPERATION(opt) | /* wait */
331 			   WAIT_REG_MEM_FUNCTION(3) |  /* equal */
332 			   WAIT_REG_MEM_ENGINE(eng_sel)));
333 
334 	if (mem_space)
335 		BUG_ON(addr0 & 0x3); /* Dword align */
336 	amdgpu_ring_write(ring, addr0);
337 	amdgpu_ring_write(ring, addr1);
338 	amdgpu_ring_write(ring, ref);
339 	amdgpu_ring_write(ring, mask);
340 	amdgpu_ring_write(ring, inv); /* poll interval */
341 }
342 
343 static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring)
344 {
345 	struct amdgpu_device *adev = ring->adev;
346 	uint32_t scratch = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
347 	uint32_t tmp = 0;
348 	unsigned i;
349 	int r;
350 
351 	WREG32(scratch, 0xCAFEDEAD);
352 	r = amdgpu_ring_alloc(ring, 5);
353 	if (r) {
354 		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
355 			  ring->idx, r);
356 		return r;
357 	}
358 
359 	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) {
360 		gfx_v11_0_ring_emit_wreg(ring, scratch, 0xDEADBEEF);
361 	} else {
362 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
363 		amdgpu_ring_write(ring, scratch -
364 				  PACKET3_SET_UCONFIG_REG_START);
365 		amdgpu_ring_write(ring, 0xDEADBEEF);
366 	}
367 	amdgpu_ring_commit(ring);
368 
369 	for (i = 0; i < adev->usec_timeout; i++) {
370 		tmp = RREG32(scratch);
371 		if (tmp == 0xDEADBEEF)
372 			break;
373 		if (amdgpu_emu_mode == 1)
374 			msleep(1);
375 		else
376 			udelay(1);
377 	}
378 
379 	if (i >= adev->usec_timeout)
380 		r = -ETIMEDOUT;
381 	return r;
382 }
383 
384 static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
385 {
386 	struct amdgpu_device *adev = ring->adev;
387 	struct amdgpu_ib ib;
388 	struct dma_fence *f = NULL;
389 	unsigned index;
390 	uint64_t gpu_addr;
391 	volatile uint32_t *cpu_ptr;
392 	long r;
393 
394 	/* MES KIQ fw hasn't indirect buffer support for now */
395 	if (adev->enable_mes_kiq &&
396 	    ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
397 		return 0;
398 
399 	memset(&ib, 0, sizeof(ib));
400 
401 	if (ring->is_mes_queue) {
402 		uint32_t padding, offset;
403 
404 		offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
405 		padding = amdgpu_mes_ctx_get_offs(ring,
406 						  AMDGPU_MES_CTX_PADDING_OFFS);
407 
408 		ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
409 		ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
410 
411 		gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding);
412 		cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding);
413 		*cpu_ptr = cpu_to_le32(0xCAFEDEAD);
414 	} else {
415 		r = amdgpu_device_wb_get(adev, &index);
416 		if (r)
417 			return r;
418 
419 		gpu_addr = adev->wb.gpu_addr + (index * 4);
420 		adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
421 		cpu_ptr = &adev->wb.wb[index];
422 
423 		r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib);
424 		if (r) {
425 			DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
426 			goto err1;
427 		}
428 	}
429 
430 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
431 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
432 	ib.ptr[2] = lower_32_bits(gpu_addr);
433 	ib.ptr[3] = upper_32_bits(gpu_addr);
434 	ib.ptr[4] = 0xDEADBEEF;
435 	ib.length_dw = 5;
436 
437 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
438 	if (r)
439 		goto err2;
440 
441 	r = dma_fence_wait_timeout(f, false, timeout);
442 	if (r == 0) {
443 		r = -ETIMEDOUT;
444 		goto err2;
445 	} else if (r < 0) {
446 		goto err2;
447 	}
448 
449 	if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF)
450 		r = 0;
451 	else
452 		r = -EINVAL;
453 err2:
454 	if (!ring->is_mes_queue)
455 		amdgpu_ib_free(adev, &ib, NULL);
456 	dma_fence_put(f);
457 err1:
458 	if (!ring->is_mes_queue)
459 		amdgpu_device_wb_free(adev, index);
460 	return r;
461 }
462 
463 static void gfx_v11_0_free_microcode(struct amdgpu_device *adev)
464 {
465 	release_firmware(adev->gfx.pfp_fw);
466 	adev->gfx.pfp_fw = NULL;
467 	release_firmware(adev->gfx.me_fw);
468 	adev->gfx.me_fw = NULL;
469 	release_firmware(adev->gfx.rlc_fw);
470 	adev->gfx.rlc_fw = NULL;
471 	release_firmware(adev->gfx.mec_fw);
472 	adev->gfx.mec_fw = NULL;
473 
474 	kfree(adev->gfx.rlc.register_list_format);
475 }
476 
477 static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
478 {
479 	char fw_name[40];
480 	char ucode_prefix[30];
481 	int err;
482 	struct amdgpu_firmware_info *info = NULL;
483 	const struct common_firmware_header *header = NULL;
484 	const struct gfx_firmware_header_v1_0 *cp_hdr;
485 	const struct gfx_firmware_header_v2_0 *cp_hdr_v2_0;
486 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
487 	uint16_t version_major;
488 	uint16_t version_minor;
489 
490 	DRM_DEBUG("\n");
491 
492 	amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
493 
494 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", ucode_prefix);
495 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
496 	if (err)
497 		goto out;
498 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
499 	if (err)
500 		goto out;
501 	/* check pfp fw hdr version to decide if enable rs64 for gfx11.*/
502 	adev->gfx.rs64_enable = amdgpu_ucode_hdr_version(
503 				(union amdgpu_firmware_header *)
504 				adev->gfx.pfp_fw->data, 2, 0);
505 	if (adev->gfx.rs64_enable) {
506 		dev_info(adev->dev, "CP RS64 enable\n");
507 		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)adev->gfx.pfp_fw->data;
508 		adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
509 		adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
510 
511 	} else {
512 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
513 		adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
514 		adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
515 	}
516 
517 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", ucode_prefix);
518 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
519 	if (err)
520 		goto out;
521 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
522 	if (err)
523 		goto out;
524 	if (adev->gfx.rs64_enable) {
525 		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)adev->gfx.me_fw->data;
526 		adev->gfx.me_fw_version = le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
527 		adev->gfx.me_feature_version = le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
528 
529 	} else {
530 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
531 		adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
532 		adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
533 	}
534 
535 	if (!amdgpu_sriov_vf(adev)) {
536 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix);
537 		err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
538 		if (err)
539 			goto out;
540 		err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
541 		if (err)
542 			goto out;
543 		rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
544 		version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
545 		version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
546 		err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
547 		if (err)
548 			goto out;
549 	}
550 
551 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", ucode_prefix);
552 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
553 	if (err)
554 		goto out;
555 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
556 	if (err)
557 		goto out;
558 	if (adev->gfx.rs64_enable) {
559 		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
560 		adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
561 		adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
562 
563 	} else {
564 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
565 		adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
566 		adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
567 	}
568 
569 	/* only one MEC for gfx 11.0.0. */
570 	adev->gfx.mec2_fw = NULL;
571 
572 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
573 		if (adev->gfx.rs64_enable) {
574 			cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)adev->gfx.pfp_fw->data;
575 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_PFP];
576 			info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_PFP;
577 			info->fw = adev->gfx.pfp_fw;
578 			header = (const struct common_firmware_header *)info->fw->data;
579 			adev->firmware.fw_size +=
580 				ALIGN(le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes), PAGE_SIZE);
581 
582 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK];
583 			info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK;
584 			info->fw = adev->gfx.pfp_fw;
585 			header = (const struct common_firmware_header *)info->fw->data;
586 			adev->firmware.fw_size +=
587 				ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE);
588 
589 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK];
590 			info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK;
591 			info->fw = adev->gfx.pfp_fw;
592 			header = (const struct common_firmware_header *)info->fw->data;
593 			adev->firmware.fw_size +=
594 				ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE);
595 
596 			cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)adev->gfx.me_fw->data;
597 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_ME];
598 			info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_ME;
599 			info->fw = adev->gfx.me_fw;
600 			header = (const struct common_firmware_header *)info->fw->data;
601 			adev->firmware.fw_size +=
602 				ALIGN(le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes), PAGE_SIZE);
603 
604 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK];
605 			info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK;
606 			info->fw = adev->gfx.me_fw;
607 			header = (const struct common_firmware_header *)info->fw->data;
608 			adev->firmware.fw_size +=
609 				ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE);
610 
611 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK];
612 			info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK;
613 			info->fw = adev->gfx.me_fw;
614 			header = (const struct common_firmware_header *)info->fw->data;
615 			adev->firmware.fw_size +=
616 				ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE);
617 
618 			cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
619 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_MEC];
620 			info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_MEC;
621 			info->fw = adev->gfx.mec_fw;
622 			header = (const struct common_firmware_header *)info->fw->data;
623 			adev->firmware.fw_size +=
624 				ALIGN(le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes), PAGE_SIZE);
625 
626 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK];
627 			info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK;
628 			info->fw = adev->gfx.mec_fw;
629 			header = (const struct common_firmware_header *)info->fw->data;
630 			adev->firmware.fw_size +=
631 				ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE);
632 
633 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK];
634 			info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK;
635 			info->fw = adev->gfx.mec_fw;
636 			header = (const struct common_firmware_header *)info->fw->data;
637 			adev->firmware.fw_size +=
638 				ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE);
639 
640 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK];
641 			info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK;
642 			info->fw = adev->gfx.mec_fw;
643 			header = (const struct common_firmware_header *)info->fw->data;
644 			adev->firmware.fw_size +=
645 				ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE);
646 
647 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK];
648 			info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK;
649 			info->fw = adev->gfx.mec_fw;
650 			header = (const struct common_firmware_header *)info->fw->data;
651 			adev->firmware.fw_size +=
652 				ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE);
653 		} else {
654 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
655 			info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
656 			info->fw = adev->gfx.pfp_fw;
657 			header = (const struct common_firmware_header *)info->fw->data;
658 			adev->firmware.fw_size +=
659 				ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
660 
661 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
662 			info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
663 			info->fw = adev->gfx.me_fw;
664 			header = (const struct common_firmware_header *)info->fw->data;
665 			adev->firmware.fw_size +=
666 				ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
667 
668 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
669 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
670 			info->fw = adev->gfx.mec_fw;
671 			header = (const struct common_firmware_header *)info->fw->data;
672 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
673 			adev->firmware.fw_size +=
674 				ALIGN(le32_to_cpu(header->ucode_size_bytes) -
675 				      le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
676 
677 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
678 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
679 			info->fw = adev->gfx.mec_fw;
680 			adev->firmware.fw_size +=
681 				ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
682 		}
683 	}
684 
685 out:
686 	if (err) {
687 		dev_err(adev->dev,
688 			"gfx11: Failed to load firmware \"%s\"\n",
689 			fw_name);
690 		release_firmware(adev->gfx.pfp_fw);
691 		adev->gfx.pfp_fw = NULL;
692 		release_firmware(adev->gfx.me_fw);
693 		adev->gfx.me_fw = NULL;
694 		release_firmware(adev->gfx.rlc_fw);
695 		adev->gfx.rlc_fw = NULL;
696 		release_firmware(adev->gfx.mec_fw);
697 		adev->gfx.mec_fw = NULL;
698 	}
699 
700 	return err;
701 }
702 
703 static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev)
704 {
705 	const struct psp_firmware_header_v1_0 *toc_hdr;
706 	int err = 0;
707 	char fw_name[40];
708 	char ucode_prefix[30];
709 
710 	amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
711 
712 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_toc.bin", ucode_prefix);
713 	err = request_firmware(&adev->psp.toc_fw, fw_name, adev->dev);
714 	if (err)
715 		goto out;
716 
717 	err = amdgpu_ucode_validate(adev->psp.toc_fw);
718 	if (err)
719 		goto out;
720 
721 	toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data;
722 	adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version);
723 	adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version);
724 	adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes);
725 	adev->psp.toc.start_addr = (uint8_t *)toc_hdr +
726 				le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes);
727 	return 0;
728 out:
729 	dev_err(adev->dev, "Failed to load TOC microcode\n");
730 	release_firmware(adev->psp.toc_fw);
731 	adev->psp.toc_fw = NULL;
732 	return err;
733 }
734 
735 static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev)
736 {
737 	u32 count = 0;
738 	const struct cs_section_def *sect = NULL;
739 	const struct cs_extent_def *ext = NULL;
740 
741 	/* begin clear state */
742 	count += 2;
743 	/* context control state */
744 	count += 3;
745 
746 	for (sect = gfx11_cs_data; sect->section != NULL; ++sect) {
747 		for (ext = sect->section; ext->extent != NULL; ++ext) {
748 			if (sect->id == SECT_CONTEXT)
749 				count += 2 + ext->reg_count;
750 			else
751 				return 0;
752 		}
753 	}
754 
755 	/* set PA_SC_TILE_STEERING_OVERRIDE */
756 	count += 3;
757 	/* end clear state */
758 	count += 2;
759 	/* clear state */
760 	count += 2;
761 
762 	return count;
763 }
764 
765 static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev,
766 				    volatile u32 *buffer)
767 {
768 	u32 count = 0, i;
769 	const struct cs_section_def *sect = NULL;
770 	const struct cs_extent_def *ext = NULL;
771 	int ctx_reg_offset;
772 
773 	if (adev->gfx.rlc.cs_data == NULL)
774 		return;
775 	if (buffer == NULL)
776 		return;
777 
778 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
779 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
780 
781 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
782 	buffer[count++] = cpu_to_le32(0x80000000);
783 	buffer[count++] = cpu_to_le32(0x80000000);
784 
785 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
786 		for (ext = sect->section; ext->extent != NULL; ++ext) {
787 			if (sect->id == SECT_CONTEXT) {
788 				buffer[count++] =
789 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
790 				buffer[count++] = cpu_to_le32(ext->reg_index -
791 						PACKET3_SET_CONTEXT_REG_START);
792 				for (i = 0; i < ext->reg_count; i++)
793 					buffer[count++] = cpu_to_le32(ext->extent[i]);
794 			} else {
795 				return;
796 			}
797 		}
798 	}
799 
800 	ctx_reg_offset =
801 		SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
802 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
803 	buffer[count++] = cpu_to_le32(ctx_reg_offset);
804 	buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override);
805 
806 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
807 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
808 
809 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
810 	buffer[count++] = cpu_to_le32(0);
811 }
812 
813 static void gfx_v11_0_rlc_fini(struct amdgpu_device *adev)
814 {
815 	/* clear state block */
816 	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
817 			&adev->gfx.rlc.clear_state_gpu_addr,
818 			(void **)&adev->gfx.rlc.cs_ptr);
819 
820 	/* jump table block */
821 	amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
822 			&adev->gfx.rlc.cp_table_gpu_addr,
823 			(void **)&adev->gfx.rlc.cp_table_ptr);
824 }
825 
826 static void gfx_v11_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
827 {
828 	struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
829 
830 	reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl;
831 	reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
832 	reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1);
833 	reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2);
834 	reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG3);
835 	reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL);
836 	reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX);
837 	reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, regRLC_SPARE_INT_0);
838 	adev->gfx.rlc.rlcg_reg_access_supported = true;
839 }
840 
841 static int gfx_v11_0_rlc_init(struct amdgpu_device *adev)
842 {
843 	const struct cs_section_def *cs_data;
844 	int r;
845 
846 	adev->gfx.rlc.cs_data = gfx11_cs_data;
847 
848 	cs_data = adev->gfx.rlc.cs_data;
849 
850 	if (cs_data) {
851 		/* init clear state block */
852 		r = amdgpu_gfx_rlc_init_csb(adev);
853 		if (r)
854 			return r;
855 	}
856 
857 	/* init spm vmid with 0xf */
858 	if (adev->gfx.rlc.funcs->update_spm_vmid)
859 		adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
860 
861 	return 0;
862 }
863 
864 static void gfx_v11_0_mec_fini(struct amdgpu_device *adev)
865 {
866 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
867 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
868 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL);
869 }
870 
871 static int gfx_v11_0_me_init(struct amdgpu_device *adev)
872 {
873 	int r;
874 
875 	bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
876 
877 	amdgpu_gfx_graphics_queue_acquire(adev);
878 
879 	r = gfx_v11_0_init_microcode(adev);
880 	if (r)
881 		DRM_ERROR("Failed to load gfx firmware!\n");
882 
883 	return r;
884 }
885 
886 static int gfx_v11_0_mec_init(struct amdgpu_device *adev)
887 {
888 	int r;
889 	u32 *hpd;
890 	size_t mec_hpd_size;
891 
892 	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
893 
894 	/* take ownership of the relevant compute queues */
895 	amdgpu_gfx_compute_queue_acquire(adev);
896 	mec_hpd_size = adev->gfx.num_compute_rings * GFX11_MEC_HPD_SIZE;
897 
898 	if (mec_hpd_size) {
899 		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
900 					      AMDGPU_GEM_DOMAIN_GTT,
901 					      &adev->gfx.mec.hpd_eop_obj,
902 					      &adev->gfx.mec.hpd_eop_gpu_addr,
903 					      (void **)&hpd);
904 		if (r) {
905 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
906 			gfx_v11_0_mec_fini(adev);
907 			return r;
908 		}
909 
910 		memset(hpd, 0, mec_hpd_size);
911 
912 		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
913 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
914 	}
915 
916 	return 0;
917 }
918 
919 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address)
920 {
921 	WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
922 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
923 		(address << SQ_IND_INDEX__INDEX__SHIFT));
924 	return RREG32_SOC15(GC, 0, regSQ_IND_DATA);
925 }
926 
927 static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave,
928 			   uint32_t thread, uint32_t regno,
929 			   uint32_t num, uint32_t *out)
930 {
931 	WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
932 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
933 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
934 		(thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) |
935 		(SQ_IND_INDEX__AUTO_INCR_MASK));
936 	while (num--)
937 		*(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA);
938 }
939 
940 static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
941 {
942 	/* in gfx11 the SIMD_ID is specified as part of the INSTANCE
943 	 * field when performing a select_se_sh so it should be
944 	 * zero here */
945 	WARN_ON(simd != 0);
946 
947 	/* type 2 wave data */
948 	dst[(*no_fields)++] = 2;
949 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS);
950 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO);
951 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI);
952 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO);
953 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI);
954 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1);
955 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2);
956 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC);
957 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC);
958 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAPSTS);
959 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS);
960 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2);
961 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1);
962 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0);
963 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE);
964 }
965 
966 static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
967 				     uint32_t wave, uint32_t start,
968 				     uint32_t size, uint32_t *dst)
969 {
970 	WARN_ON(simd != 0);
971 
972 	wave_read_regs(
973 		adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size,
974 		dst);
975 }
976 
977 static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
978 				      uint32_t wave, uint32_t thread,
979 				      uint32_t start, uint32_t size,
980 				      uint32_t *dst)
981 {
982 	wave_read_regs(
983 		adev, wave, thread,
984 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
985 }
986 
987 static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev,
988 									  u32 me, u32 pipe, u32 q, u32 vm)
989 {
990 	soc21_grbm_select(adev, me, pipe, q, vm);
991 }
992 
993 static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = {
994 	.get_gpu_clock_counter = &gfx_v11_0_get_gpu_clock_counter,
995 	.select_se_sh = &gfx_v11_0_select_se_sh,
996 	.read_wave_data = &gfx_v11_0_read_wave_data,
997 	.read_wave_sgprs = &gfx_v11_0_read_wave_sgprs,
998 	.read_wave_vgprs = &gfx_v11_0_read_wave_vgprs,
999 	.select_me_pipe_q = &gfx_v11_0_select_me_pipe_q,
1000 	.init_spm_golden = &gfx_v11_0_init_spm_golden_registers,
1001 	.update_perfmon_mgcg = &gfx_v11_0_update_perf_clk,
1002 };
1003 
1004 static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev)
1005 {
1006 	adev->gfx.funcs = &gfx_v11_0_gfx_funcs;
1007 
1008 	switch (adev->ip_versions[GC_HWIP][0]) {
1009 	case IP_VERSION(11, 0, 0):
1010 	case IP_VERSION(11, 0, 2):
1011 		adev->gfx.config.max_hw_contexts = 8;
1012 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1013 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1014 		adev->gfx.config.sc_hiz_tile_fifo_size = 0;
1015 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1016 		break;
1017 	case IP_VERSION(11, 0, 1):
1018 		adev->gfx.config.max_hw_contexts = 8;
1019 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1020 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1021 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
1022 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x300;
1023 		break;
1024 	default:
1025 		BUG();
1026 		break;
1027 	}
1028 
1029 	return 0;
1030 }
1031 
1032 static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
1033 				   int me, int pipe, int queue)
1034 {
1035 	int r;
1036 	struct amdgpu_ring *ring;
1037 	unsigned int irq_type;
1038 
1039 	ring = &adev->gfx.gfx_ring[ring_id];
1040 
1041 	ring->me = me;
1042 	ring->pipe = pipe;
1043 	ring->queue = queue;
1044 
1045 	ring->ring_obj = NULL;
1046 	ring->use_doorbell = true;
1047 
1048 	if (!ring_id)
1049 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
1050 	else
1051 		ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1;
1052 	sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1053 
1054 	irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
1055 	r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
1056 			     AMDGPU_RING_PRIO_DEFAULT, NULL);
1057 	if (r)
1058 		return r;
1059 	return 0;
1060 }
1061 
1062 static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1063 				       int mec, int pipe, int queue)
1064 {
1065 	int r;
1066 	unsigned irq_type;
1067 	struct amdgpu_ring *ring;
1068 	unsigned int hw_prio;
1069 
1070 	ring = &adev->gfx.compute_ring[ring_id];
1071 
1072 	/* mec0 is me1 */
1073 	ring->me = mec + 1;
1074 	ring->pipe = pipe;
1075 	ring->queue = queue;
1076 
1077 	ring->ring_obj = NULL;
1078 	ring->use_doorbell = true;
1079 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
1080 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1081 				+ (ring_id * GFX11_MEC_HPD_SIZE);
1082 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1083 
1084 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1085 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1086 		+ ring->pipe;
1087 	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
1088 			AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
1089 	/* type-2 packets are deprecated on MEC, use type-3 instead */
1090 	r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
1091 			     hw_prio, NULL);
1092 	if (r)
1093 		return r;
1094 
1095 	return 0;
1096 }
1097 
1098 static struct {
1099 	SOC21_FIRMWARE_ID	id;
1100 	unsigned int		offset;
1101 	unsigned int		size;
1102 } rlc_autoload_info[SOC21_FIRMWARE_ID_MAX];
1103 
1104 static void gfx_v11_0_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc)
1105 {
1106 	RLC_TABLE_OF_CONTENT *ucode = rlc_toc;
1107 
1108 	while (ucode && (ucode->id > SOC21_FIRMWARE_ID_INVALID) &&
1109 			(ucode->id < SOC21_FIRMWARE_ID_MAX)) {
1110 		rlc_autoload_info[ucode->id].id = ucode->id;
1111 		rlc_autoload_info[ucode->id].offset = ucode->offset * 4;
1112 		rlc_autoload_info[ucode->id].size = ucode->size * 4;
1113 
1114 		ucode++;
1115 	}
1116 }
1117 
1118 static uint32_t gfx_v11_0_calc_toc_total_size(struct amdgpu_device *adev)
1119 {
1120 	uint32_t total_size = 0;
1121 	SOC21_FIRMWARE_ID id;
1122 
1123 	gfx_v11_0_parse_rlc_toc(adev, adev->psp.toc.start_addr);
1124 
1125 	for (id = SOC21_FIRMWARE_ID_RLC_G_UCODE; id < SOC21_FIRMWARE_ID_MAX; id++)
1126 		total_size += rlc_autoload_info[id].size;
1127 
1128 	/* In case the offset in rlc toc ucode is aligned */
1129 	if (total_size < rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset)
1130 		total_size = rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset +
1131 			rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].size;
1132 
1133 	return total_size;
1134 }
1135 
1136 static int gfx_v11_0_rlc_autoload_buffer_init(struct amdgpu_device *adev)
1137 {
1138 	int r;
1139 	uint32_t total_size;
1140 
1141 	total_size = gfx_v11_0_calc_toc_total_size(adev);
1142 
1143 	r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024,
1144 			AMDGPU_GEM_DOMAIN_VRAM,
1145 			&adev->gfx.rlc.rlc_autoload_bo,
1146 			&adev->gfx.rlc.rlc_autoload_gpu_addr,
1147 			(void **)&adev->gfx.rlc.rlc_autoload_ptr);
1148 
1149 	if (r) {
1150 		dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r);
1151 		return r;
1152 	}
1153 
1154 	return 0;
1155 }
1156 
1157 static void gfx_v11_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev,
1158 					      SOC21_FIRMWARE_ID id,
1159 			    		      const void *fw_data,
1160 					      uint32_t fw_size,
1161 					      uint32_t *fw_autoload_mask)
1162 {
1163 	uint32_t toc_offset;
1164 	uint32_t toc_fw_size;
1165 	char *ptr = adev->gfx.rlc.rlc_autoload_ptr;
1166 
1167 	if (id <= SOC21_FIRMWARE_ID_INVALID || id >= SOC21_FIRMWARE_ID_MAX)
1168 		return;
1169 
1170 	toc_offset = rlc_autoload_info[id].offset;
1171 	toc_fw_size = rlc_autoload_info[id].size;
1172 
1173 	if (fw_size == 0)
1174 		fw_size = toc_fw_size;
1175 
1176 	if (fw_size > toc_fw_size)
1177 		fw_size = toc_fw_size;
1178 
1179 	memcpy(ptr + toc_offset, fw_data, fw_size);
1180 
1181 	if (fw_size < toc_fw_size)
1182 		memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size);
1183 
1184 	if ((id != SOC21_FIRMWARE_ID_RS64_PFP) && (id != SOC21_FIRMWARE_ID_RS64_ME))
1185 		*(uint64_t *)fw_autoload_mask |= 1ULL << id;
1186 }
1187 
1188 static void gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev,
1189 							uint32_t *fw_autoload_mask)
1190 {
1191 	void *data;
1192 	uint32_t size;
1193 	uint64_t *toc_ptr;
1194 
1195 	*(uint64_t *)fw_autoload_mask |= 0x1;
1196 
1197 	DRM_DEBUG("rlc autoload enabled fw: 0x%llx\n", *(uint64_t *)fw_autoload_mask);
1198 
1199 	data = adev->psp.toc.start_addr;
1200 	size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_TOC].size;
1201 
1202 	toc_ptr = (uint64_t *)data + size / 8 - 1;
1203 	*toc_ptr = *(uint64_t *)fw_autoload_mask;
1204 
1205 	gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_TOC,
1206 					data, size, fw_autoload_mask);
1207 }
1208 
1209 static void gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev,
1210 							uint32_t *fw_autoload_mask)
1211 {
1212 	const __le32 *fw_data;
1213 	uint32_t fw_size;
1214 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1215 	const struct gfx_firmware_header_v2_0 *cpv2_hdr;
1216 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1217 	const struct rlc_firmware_header_v2_2 *rlcv22_hdr;
1218 	uint16_t version_major, version_minor;
1219 
1220 	if (adev->gfx.rs64_enable) {
1221 		/* pfp ucode */
1222 		cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1223 			adev->gfx.pfp_fw->data;
1224 		/* instruction */
1225 		fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1226 			le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1227 		fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1228 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP,
1229 						fw_data, fw_size, fw_autoload_mask);
1230 		/* data */
1231 		fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1232 			le32_to_cpu(cpv2_hdr->data_offset_bytes));
1233 		fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1234 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK,
1235 						fw_data, fw_size, fw_autoload_mask);
1236 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P1_STACK,
1237 						fw_data, fw_size, fw_autoload_mask);
1238 		/* me ucode */
1239 		cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1240 			adev->gfx.me_fw->data;
1241 		/* instruction */
1242 		fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1243 			le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1244 		fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1245 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME,
1246 						fw_data, fw_size, fw_autoload_mask);
1247 		/* data */
1248 		fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1249 			le32_to_cpu(cpv2_hdr->data_offset_bytes));
1250 		fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1251 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P0_STACK,
1252 						fw_data, fw_size, fw_autoload_mask);
1253 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P1_STACK,
1254 						fw_data, fw_size, fw_autoload_mask);
1255 		/* mec ucode */
1256 		cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1257 			adev->gfx.mec_fw->data;
1258 		/* instruction */
1259 		fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1260 			le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1261 		fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1262 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC,
1263 						fw_data, fw_size, fw_autoload_mask);
1264 		/* data */
1265 		fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1266 			le32_to_cpu(cpv2_hdr->data_offset_bytes));
1267 		fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1268 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK,
1269 						fw_data, fw_size, fw_autoload_mask);
1270 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P1_STACK,
1271 						fw_data, fw_size, fw_autoload_mask);
1272 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P2_STACK,
1273 						fw_data, fw_size, fw_autoload_mask);
1274 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P3_STACK,
1275 						fw_data, fw_size, fw_autoload_mask);
1276 	} else {
1277 		/* pfp ucode */
1278 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1279 			adev->gfx.pfp_fw->data;
1280 		fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1281 				le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
1282 		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1283 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_PFP,
1284 						fw_data, fw_size, fw_autoload_mask);
1285 
1286 		/* me ucode */
1287 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1288 			adev->gfx.me_fw->data;
1289 		fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1290 				le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
1291 		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1292 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_ME,
1293 						fw_data, fw_size, fw_autoload_mask);
1294 
1295 		/* mec ucode */
1296 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1297 			adev->gfx.mec_fw->data;
1298 		fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1299 				le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
1300 		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
1301 			cp_hdr->jt_size * 4;
1302 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_MEC,
1303 						fw_data, fw_size, fw_autoload_mask);
1304 	}
1305 
1306 	/* rlc ucode */
1307 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)
1308 		adev->gfx.rlc_fw->data;
1309 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1310 			le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes));
1311 	fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes);
1312 	gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_G_UCODE,
1313 					fw_data, fw_size, fw_autoload_mask);
1314 
1315 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1316 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1317 	if (version_major == 2) {
1318 		if (version_minor >= 2) {
1319 			rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
1320 
1321 			fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1322 					le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes));
1323 			fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes);
1324 			gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_UCODE,
1325 					fw_data, fw_size, fw_autoload_mask);
1326 
1327 			fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1328 					le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes));
1329 			fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes);
1330 			gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_DRAM_BOOT,
1331 					fw_data, fw_size, fw_autoload_mask);
1332 		}
1333 	}
1334 }
1335 
1336 static void gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev,
1337 							uint32_t *fw_autoload_mask)
1338 {
1339 	const __le32 *fw_data;
1340 	uint32_t fw_size;
1341 	const struct sdma_firmware_header_v2_0 *sdma_hdr;
1342 
1343 	sdma_hdr = (const struct sdma_firmware_header_v2_0 *)
1344 		adev->sdma.instance[0].fw->data;
1345 	fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
1346 			le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes));
1347 	fw_size = le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes);
1348 
1349 	gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1350 			SOC21_FIRMWARE_ID_SDMA_UCODE_TH0, fw_data, fw_size, fw_autoload_mask);
1351 
1352 	fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
1353 			le32_to_cpu(sdma_hdr->ctl_ucode_offset));
1354 	fw_size = le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes);
1355 
1356 	gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1357 			SOC21_FIRMWARE_ID_SDMA_UCODE_TH1, fw_data, fw_size, fw_autoload_mask);
1358 }
1359 
1360 static void gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev,
1361 							uint32_t *fw_autoload_mask)
1362 {
1363 	const __le32 *fw_data;
1364 	unsigned fw_size;
1365 	const struct mes_firmware_header_v1_0 *mes_hdr;
1366 	int pipe, ucode_id, data_id;
1367 
1368 	for (pipe = 0; pipe < 2; pipe++) {
1369 		if (pipe==0) {
1370 			ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P0;
1371 			data_id  = SOC21_FIRMWARE_ID_RS64_MES_P0_STACK;
1372 		} else {
1373 			ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P1;
1374 			data_id  = SOC21_FIRMWARE_ID_RS64_MES_P1_STACK;
1375 		}
1376 
1377 		mes_hdr = (const struct mes_firmware_header_v1_0 *)
1378 			adev->mes.fw[pipe]->data;
1379 
1380 		fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
1381 				le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
1382 		fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
1383 
1384 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1385 				ucode_id, fw_data, fw_size, fw_autoload_mask);
1386 
1387 		fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
1388 				le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
1389 		fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
1390 
1391 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1392 				data_id, fw_data, fw_size, fw_autoload_mask);
1393 	}
1394 }
1395 
1396 static int gfx_v11_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev)
1397 {
1398 	uint32_t rlc_g_offset, rlc_g_size;
1399 	uint64_t gpu_addr;
1400 	uint32_t autoload_fw_id[2];
1401 
1402 	memset(autoload_fw_id, 0, sizeof(uint32_t) * 2);
1403 
1404 	/* RLC autoload sequence 2: copy ucode */
1405 	gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(adev, autoload_fw_id);
1406 	gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(adev, autoload_fw_id);
1407 	gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(adev, autoload_fw_id);
1408 	gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(adev, autoload_fw_id);
1409 
1410 	rlc_g_offset = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].offset;
1411 	rlc_g_size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].size;
1412 	gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset;
1413 
1414 	WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, upper_32_bits(gpu_addr));
1415 	WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, lower_32_bits(gpu_addr));
1416 
1417 	WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_SIZE, rlc_g_size);
1418 
1419 	/* RLC autoload sequence 3: load IMU fw */
1420 	if (adev->gfx.imu.funcs->load_microcode)
1421 		adev->gfx.imu.funcs->load_microcode(adev);
1422 	/* RLC autoload sequence 4 init IMU fw */
1423 	if (adev->gfx.imu.funcs->setup_imu)
1424 		adev->gfx.imu.funcs->setup_imu(adev);
1425 	if (adev->gfx.imu.funcs->start_imu)
1426 		adev->gfx.imu.funcs->start_imu(adev);
1427 
1428 	/* RLC autoload sequence 5 disable gpa mode */
1429 	gfx_v11_0_disable_gpa_mode(adev);
1430 
1431 	return 0;
1432 }
1433 
1434 static int gfx_v11_0_sw_init(void *handle)
1435 {
1436 	int i, j, k, r, ring_id = 0;
1437 	struct amdgpu_kiq *kiq;
1438 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1439 
1440 	adev->gfxhub.funcs->init(adev);
1441 
1442 	switch (adev->ip_versions[GC_HWIP][0]) {
1443 	case IP_VERSION(11, 0, 0):
1444 	case IP_VERSION(11, 0, 1):
1445 	case IP_VERSION(11, 0, 2):
1446 		adev->gfx.me.num_me = 1;
1447 		adev->gfx.me.num_pipe_per_me = 1;
1448 		adev->gfx.me.num_queue_per_pipe = 1;
1449 		adev->gfx.mec.num_mec = 2;
1450 		adev->gfx.mec.num_pipe_per_mec = 4;
1451 		adev->gfx.mec.num_queue_per_pipe = 4;
1452 		break;
1453 	default:
1454 		adev->gfx.me.num_me = 1;
1455 		adev->gfx.me.num_pipe_per_me = 1;
1456 		adev->gfx.me.num_queue_per_pipe = 1;
1457 		adev->gfx.mec.num_mec = 1;
1458 		adev->gfx.mec.num_pipe_per_mec = 4;
1459 		adev->gfx.mec.num_queue_per_pipe = 8;
1460 		break;
1461 	}
1462 
1463 	/* EOP Event */
1464 	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1465 			      GFX_11_0_0__SRCID__CP_EOP_INTERRUPT,
1466 			      &adev->gfx.eop_irq);
1467 	if (r)
1468 		return r;
1469 
1470 	/* Privileged reg */
1471 	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1472 			      GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT,
1473 			      &adev->gfx.priv_reg_irq);
1474 	if (r)
1475 		return r;
1476 
1477 	/* Privileged inst */
1478 	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1479 			      GFX_11_0_0__SRCID__CP_PRIV_INSTR_FAULT,
1480 			      &adev->gfx.priv_inst_irq);
1481 	if (r)
1482 		return r;
1483 
1484 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1485 
1486 	if (adev->gfx.imu.funcs) {
1487 		if (adev->gfx.imu.funcs->init_microcode) {
1488 			r = adev->gfx.imu.funcs->init_microcode(adev);
1489 			if (r)
1490 				DRM_ERROR("Failed to load imu firmware!\n");
1491 		}
1492 	}
1493 
1494 	r = gfx_v11_0_me_init(adev);
1495 	if (r)
1496 		return r;
1497 
1498 	r = gfx_v11_0_rlc_init(adev);
1499 	if (r) {
1500 		DRM_ERROR("Failed to init rlc BOs!\n");
1501 		return r;
1502 	}
1503 
1504 	r = gfx_v11_0_mec_init(adev);
1505 	if (r) {
1506 		DRM_ERROR("Failed to init MEC BOs!\n");
1507 		return r;
1508 	}
1509 
1510 	/* set up the gfx ring */
1511 	for (i = 0; i < adev->gfx.me.num_me; i++) {
1512 		for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
1513 			for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
1514 				if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
1515 					continue;
1516 
1517 				r = gfx_v11_0_gfx_ring_init(adev, ring_id,
1518 							    i, k, j);
1519 				if (r)
1520 					return r;
1521 				ring_id++;
1522 			}
1523 		}
1524 	}
1525 
1526 	ring_id = 0;
1527 	/* set up the compute queues - allocate horizontally across pipes */
1528 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1529 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1530 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1531 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k,
1532 								     j))
1533 					continue;
1534 
1535 				r = gfx_v11_0_compute_ring_init(adev, ring_id,
1536 								i, k, j);
1537 				if (r)
1538 					return r;
1539 
1540 				ring_id++;
1541 			}
1542 		}
1543 	}
1544 
1545 	if (!adev->enable_mes_kiq) {
1546 		r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE);
1547 		if (r) {
1548 			DRM_ERROR("Failed to init KIQ BOs!\n");
1549 			return r;
1550 		}
1551 
1552 		kiq = &adev->gfx.kiq;
1553 		r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
1554 		if (r)
1555 			return r;
1556 	}
1557 
1558 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v11_compute_mqd));
1559 	if (r)
1560 		return r;
1561 
1562 	/* allocate visible FB for rlc auto-loading fw */
1563 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
1564 		r = gfx_v11_0_init_toc_microcode(adev);
1565 		if (r)
1566 			dev_err(adev->dev, "Failed to load toc firmware!\n");
1567 		r = gfx_v11_0_rlc_autoload_buffer_init(adev);
1568 		if (r)
1569 			return r;
1570 	}
1571 
1572 	r = gfx_v11_0_gpu_early_init(adev);
1573 	if (r)
1574 		return r;
1575 
1576 	return 0;
1577 }
1578 
1579 static void gfx_v11_0_pfp_fini(struct amdgpu_device *adev)
1580 {
1581 	amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj,
1582 			      &adev->gfx.pfp.pfp_fw_gpu_addr,
1583 			      (void **)&adev->gfx.pfp.pfp_fw_ptr);
1584 
1585 	amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_data_obj,
1586 			      &adev->gfx.pfp.pfp_fw_data_gpu_addr,
1587 			      (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
1588 }
1589 
1590 static void gfx_v11_0_me_fini(struct amdgpu_device *adev)
1591 {
1592 	amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,
1593 			      &adev->gfx.me.me_fw_gpu_addr,
1594 			      (void **)&adev->gfx.me.me_fw_ptr);
1595 
1596 	amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_data_obj,
1597 			       &adev->gfx.me.me_fw_data_gpu_addr,
1598 			       (void **)&adev->gfx.me.me_fw_data_ptr);
1599 }
1600 
1601 static void gfx_v11_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev)
1602 {
1603 	amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo,
1604 			&adev->gfx.rlc.rlc_autoload_gpu_addr,
1605 			(void **)&adev->gfx.rlc.rlc_autoload_ptr);
1606 }
1607 
1608 static int gfx_v11_0_sw_fini(void *handle)
1609 {
1610 	int i;
1611 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1612 
1613 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1614 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1615 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
1616 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1617 
1618 	amdgpu_gfx_mqd_sw_fini(adev);
1619 
1620 	if (!adev->enable_mes_kiq) {
1621 		amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
1622 		amdgpu_gfx_kiq_fini(adev);
1623 	}
1624 
1625 	gfx_v11_0_pfp_fini(adev);
1626 	gfx_v11_0_me_fini(adev);
1627 	gfx_v11_0_rlc_fini(adev);
1628 	gfx_v11_0_mec_fini(adev);
1629 
1630 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
1631 		gfx_v11_0_rlc_autoload_buffer_fini(adev);
1632 
1633 	gfx_v11_0_free_microcode(adev);
1634 
1635 	return 0;
1636 }
1637 
1638 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
1639 				   u32 sh_num, u32 instance)
1640 {
1641 	u32 data;
1642 
1643 	if (instance == 0xffffffff)
1644 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
1645 				     INSTANCE_BROADCAST_WRITES, 1);
1646 	else
1647 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX,
1648 				     instance);
1649 
1650 	if (se_num == 0xffffffff)
1651 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES,
1652 				     1);
1653 	else
1654 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
1655 
1656 	if (sh_num == 0xffffffff)
1657 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES,
1658 				     1);
1659 	else
1660 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num);
1661 
1662 	WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data);
1663 }
1664 
1665 static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1666 {
1667 	u32 data, mask;
1668 
1669 	data = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE);
1670 	data |= RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE);
1671 
1672 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1673 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
1674 
1675 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
1676 					 adev->gfx.config.max_sh_per_se);
1677 
1678 	return (~data) & mask;
1679 }
1680 
1681 static void gfx_v11_0_setup_rb(struct amdgpu_device *adev)
1682 {
1683 	int i, j;
1684 	u32 data;
1685 	u32 active_rbs = 0;
1686 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
1687 					adev->gfx.config.max_sh_per_se;
1688 
1689 	mutex_lock(&adev->grbm_idx_mutex);
1690 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1691 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1692 			gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff);
1693 			data = gfx_v11_0_get_rb_active_bitmap(adev);
1694 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
1695 					       rb_bitmap_width_per_sh);
1696 		}
1697 	}
1698 	gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1699 	mutex_unlock(&adev->grbm_idx_mutex);
1700 
1701 	adev->gfx.config.backend_enable_mask = active_rbs;
1702 	adev->gfx.config.num_rbs = hweight32(active_rbs);
1703 }
1704 
1705 #define DEFAULT_SH_MEM_BASES	(0x6000)
1706 #define LDS_APP_BASE           0x1
1707 #define SCRATCH_APP_BASE       0x2
1708 
1709 static void gfx_v11_0_init_compute_vmid(struct amdgpu_device *adev)
1710 {
1711 	int i;
1712 	uint32_t sh_mem_bases;
1713 	uint32_t data;
1714 
1715 	/*
1716 	 * Configure apertures:
1717 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
1718 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
1719 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
1720 	 */
1721 	sh_mem_bases = (LDS_APP_BASE << SH_MEM_BASES__SHARED_BASE__SHIFT) |
1722 			SCRATCH_APP_BASE;
1723 
1724 	mutex_lock(&adev->srbm_mutex);
1725 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
1726 		soc21_grbm_select(adev, 0, 0, 0, i);
1727 		/* CP and shaders */
1728 		WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
1729 		WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases);
1730 
1731 		/* Enable trap for each kfd vmid. */
1732 		data = RREG32(SOC15_REG_OFFSET(GC, 0, regSPI_GDBG_PER_VMID_CNTL));
1733 		data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
1734 	}
1735 	soc21_grbm_select(adev, 0, 0, 0, 0);
1736 	mutex_unlock(&adev->srbm_mutex);
1737 
1738 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
1739 	   acccess. These should be enabled by FW for target VMIDs. */
1740 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
1741 		WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * i, 0);
1742 		WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * i, 0);
1743 		WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, i, 0);
1744 		WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, i, 0);
1745 	}
1746 }
1747 
1748 static void gfx_v11_0_init_gds_vmid(struct amdgpu_device *adev)
1749 {
1750 	int vmid;
1751 
1752 	/*
1753 	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
1754 	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
1755 	 * the driver can enable them for graphics. VMID0 should maintain
1756 	 * access so that HWS firmware can save/restore entries.
1757 	 */
1758 	for (vmid = 1; vmid < 16; vmid++) {
1759 		WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * vmid, 0);
1760 		WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * vmid, 0);
1761 		WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, vmid, 0);
1762 		WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, vmid, 0);
1763 	}
1764 }
1765 
1766 static void gfx_v11_0_tcp_harvest(struct amdgpu_device *adev)
1767 {
1768 	/* TODO: harvest feature to be added later. */
1769 }
1770 
1771 static void gfx_v11_0_get_tcc_info(struct amdgpu_device *adev)
1772 {
1773 	/* TCCs are global (not instanced). */
1774 	uint32_t tcc_disable = RREG32_SOC15(GC, 0, regCGTS_TCC_DISABLE) |
1775 			       RREG32_SOC15(GC, 0, regCGTS_USER_TCC_DISABLE);
1776 
1777 	adev->gfx.config.tcc_disabled_mask =
1778 		REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) |
1779 		(REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16);
1780 }
1781 
1782 static void gfx_v11_0_constants_init(struct amdgpu_device *adev)
1783 {
1784 	u32 tmp;
1785 	int i;
1786 
1787 	WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
1788 
1789 	gfx_v11_0_setup_rb(adev);
1790 	gfx_v11_0_get_cu_info(adev, &adev->gfx.cu_info);
1791 	gfx_v11_0_get_tcc_info(adev);
1792 	adev->gfx.config.pa_sc_tile_steering_override = 0;
1793 
1794 	/* XXX SH_MEM regs */
1795 	/* where to put LDS, scratch, GPUVM in FSA64 space */
1796 	mutex_lock(&adev->srbm_mutex);
1797 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
1798 		soc21_grbm_select(adev, 0, 0, 0, i);
1799 		/* CP and shaders */
1800 		WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
1801 		if (i != 0) {
1802 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
1803 				(adev->gmc.private_aperture_start >> 48));
1804 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
1805 				(adev->gmc.shared_aperture_start >> 48));
1806 			WREG32_SOC15(GC, 0, regSH_MEM_BASES, tmp);
1807 		}
1808 	}
1809 	soc21_grbm_select(adev, 0, 0, 0, 0);
1810 
1811 	mutex_unlock(&adev->srbm_mutex);
1812 
1813 	gfx_v11_0_init_compute_vmid(adev);
1814 	gfx_v11_0_init_gds_vmid(adev);
1815 }
1816 
1817 static void gfx_v11_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
1818 					       bool enable)
1819 {
1820 	u32 tmp;
1821 
1822 	if (amdgpu_sriov_vf(adev))
1823 		return;
1824 
1825 	tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0);
1826 
1827 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
1828 			    enable ? 1 : 0);
1829 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
1830 			    enable ? 1 : 0);
1831 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
1832 			    enable ? 1 : 0);
1833 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
1834 			    enable ? 1 : 0);
1835 
1836 	WREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0, tmp);
1837 }
1838 
1839 static int gfx_v11_0_init_csb(struct amdgpu_device *adev)
1840 {
1841 	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
1842 
1843 	WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_HI,
1844 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
1845 	WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_LO,
1846 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
1847 	WREG32_SOC15(GC, 0, regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
1848 
1849 	return 0;
1850 }
1851 
1852 static void gfx_v11_0_rlc_stop(struct amdgpu_device *adev)
1853 {
1854 	u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL);
1855 
1856 	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
1857 	WREG32_SOC15(GC, 0, regRLC_CNTL, tmp);
1858 }
1859 
1860 static void gfx_v11_0_rlc_reset(struct amdgpu_device *adev)
1861 {
1862 	WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
1863 	udelay(50);
1864 	WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
1865 	udelay(50);
1866 }
1867 
1868 static void gfx_v11_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev,
1869 					     bool enable)
1870 {
1871 	uint32_t rlc_pg_cntl;
1872 
1873 	rlc_pg_cntl = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
1874 
1875 	if (!enable) {
1876 		/* RLC_PG_CNTL[23] = 0 (default)
1877 		 * RLC will wait for handshake acks with SMU
1878 		 * GFXOFF will be enabled
1879 		 * RLC_PG_CNTL[23] = 1
1880 		 * RLC will not issue any message to SMU
1881 		 * hence no handshake between SMU & RLC
1882 		 * GFXOFF will be disabled
1883 		 */
1884 		rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
1885 	} else
1886 		rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
1887 	WREG32_SOC15(GC, 0, regRLC_PG_CNTL, rlc_pg_cntl);
1888 }
1889 
1890 static void gfx_v11_0_rlc_start(struct amdgpu_device *adev)
1891 {
1892 	/* TODO: enable rlc & smu handshake until smu
1893 	 * and gfxoff feature works as expected */
1894 	if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK))
1895 		gfx_v11_0_rlc_smu_handshake_cntl(adev, false);
1896 
1897 	WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
1898 	udelay(50);
1899 }
1900 
1901 static void gfx_v11_0_rlc_enable_srm(struct amdgpu_device *adev)
1902 {
1903 	uint32_t tmp;
1904 
1905 	/* enable Save Restore Machine */
1906 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL));
1907 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
1908 	tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
1909 	WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL), tmp);
1910 }
1911 
1912 static void gfx_v11_0_load_rlcg_microcode(struct amdgpu_device *adev)
1913 {
1914 	const struct rlc_firmware_header_v2_0 *hdr;
1915 	const __le32 *fw_data;
1916 	unsigned i, fw_size;
1917 
1918 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1919 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1920 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1921 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1922 
1923 	WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR,
1924 		     RLCG_UCODE_LOADING_START_ADDRESS);
1925 
1926 	for (i = 0; i < fw_size; i++)
1927 		WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA,
1928 			     le32_to_cpup(fw_data++));
1929 
1930 	WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
1931 }
1932 
1933 static void gfx_v11_0_load_rlc_iram_dram_microcode(struct amdgpu_device *adev)
1934 {
1935 	const struct rlc_firmware_header_v2_2 *hdr;
1936 	const __le32 *fw_data;
1937 	unsigned i, fw_size;
1938 	u32 tmp;
1939 
1940 	hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
1941 
1942 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1943 			le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes));
1944 	fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4;
1945 
1946 	WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, 0);
1947 
1948 	for (i = 0; i < fw_size; i++) {
1949 		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
1950 			msleep(1);
1951 		WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_DATA,
1952 				le32_to_cpup(fw_data++));
1953 	}
1954 
1955 	WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
1956 
1957 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1958 			le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes));
1959 	fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4;
1960 
1961 	WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_ADDR, 0);
1962 	for (i = 0; i < fw_size; i++) {
1963 		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
1964 			msleep(1);
1965 		WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_DATA,
1966 				le32_to_cpup(fw_data++));
1967 	}
1968 
1969 	WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
1970 
1971 	tmp = RREG32_SOC15(GC, 0, regRLC_LX6_CNTL);
1972 	tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1);
1973 	tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0);
1974 	WREG32_SOC15(GC, 0, regRLC_LX6_CNTL, tmp);
1975 }
1976 
1977 static void gfx_v11_0_load_rlcp_rlcv_microcode(struct amdgpu_device *adev)
1978 {
1979 	const struct rlc_firmware_header_v2_3 *hdr;
1980 	const __le32 *fw_data;
1981 	unsigned i, fw_size;
1982 	u32 tmp;
1983 
1984 	hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data;
1985 
1986 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1987 			le32_to_cpu(hdr->rlcp_ucode_offset_bytes));
1988 	fw_size = le32_to_cpu(hdr->rlcp_ucode_size_bytes) / 4;
1989 
1990 	WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, 0);
1991 
1992 	for (i = 0; i < fw_size; i++) {
1993 		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
1994 			msleep(1);
1995 		WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_DATA,
1996 				le32_to_cpup(fw_data++));
1997 	}
1998 
1999 	WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, adev->gfx.rlc_fw_version);
2000 
2001 	tmp = RREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE);
2002 	tmp = REG_SET_FIELD(tmp, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1);
2003 	WREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE, tmp);
2004 
2005 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2006 			le32_to_cpu(hdr->rlcv_ucode_offset_bytes));
2007 	fw_size = le32_to_cpu(hdr->rlcv_ucode_size_bytes) / 4;
2008 
2009 	WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, 0);
2010 
2011 	for (i = 0; i < fw_size; i++) {
2012 		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
2013 			msleep(1);
2014 		WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_DATA,
2015 				le32_to_cpup(fw_data++));
2016 	}
2017 
2018 	WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, adev->gfx.rlc_fw_version);
2019 
2020 	tmp = RREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL);
2021 	tmp = REG_SET_FIELD(tmp, RLC_GPU_IOV_F32_CNTL, ENABLE, 1);
2022 	WREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL, tmp);
2023 }
2024 
2025 static int gfx_v11_0_rlc_load_microcode(struct amdgpu_device *adev)
2026 {
2027 	const struct rlc_firmware_header_v2_0 *hdr;
2028 	uint16_t version_major;
2029 	uint16_t version_minor;
2030 
2031 	if (!adev->gfx.rlc_fw)
2032 		return -EINVAL;
2033 
2034 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2035 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
2036 
2037 	version_major = le16_to_cpu(hdr->header.header_version_major);
2038 	version_minor = le16_to_cpu(hdr->header.header_version_minor);
2039 
2040 	if (version_major == 2) {
2041 		gfx_v11_0_load_rlcg_microcode(adev);
2042 		if (amdgpu_dpm == 1) {
2043 			if (version_minor >= 2)
2044 				gfx_v11_0_load_rlc_iram_dram_microcode(adev);
2045 			if (version_minor == 3)
2046 				gfx_v11_0_load_rlcp_rlcv_microcode(adev);
2047 		}
2048 
2049 		return 0;
2050 	}
2051 
2052 	return -EINVAL;
2053 }
2054 
2055 static int gfx_v11_0_rlc_resume(struct amdgpu_device *adev)
2056 {
2057 	int r;
2058 
2059 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
2060 		gfx_v11_0_init_csb(adev);
2061 
2062 		if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */
2063 			gfx_v11_0_rlc_enable_srm(adev);
2064 	} else {
2065 		if (amdgpu_sriov_vf(adev)) {
2066 			gfx_v11_0_init_csb(adev);
2067 			return 0;
2068 		}
2069 
2070 		adev->gfx.rlc.funcs->stop(adev);
2071 
2072 		/* disable CG */
2073 		WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0);
2074 
2075 		/* disable PG */
2076 		WREG32_SOC15(GC, 0, regRLC_PG_CNTL, 0);
2077 
2078 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
2079 			/* legacy rlc firmware loading */
2080 			r = gfx_v11_0_rlc_load_microcode(adev);
2081 			if (r)
2082 				return r;
2083 		}
2084 
2085 		gfx_v11_0_init_csb(adev);
2086 
2087 		adev->gfx.rlc.funcs->start(adev);
2088 	}
2089 	return 0;
2090 }
2091 
2092 static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr)
2093 {
2094 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2095 	uint32_t tmp;
2096 	int i;
2097 
2098 	/* Trigger an invalidation of the L1 instruction caches */
2099 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2100 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2101 	WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
2102 
2103 	/* Wait for invalidation complete */
2104 	for (i = 0; i < usec_timeout; i++) {
2105 		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2106 		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2107 					INVALIDATE_CACHE_COMPLETE))
2108 			break;
2109 		udelay(1);
2110 	}
2111 
2112 	if (i >= usec_timeout) {
2113 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2114 		return -EINVAL;
2115 	}
2116 
2117 	if (amdgpu_emu_mode == 1)
2118 		adev->hdp.funcs->flush_hdp(adev, NULL);
2119 
2120 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
2121 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
2122 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
2123 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
2124 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
2125 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
2126 
2127 	/* Program me ucode address into intruction cache address register */
2128 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
2129 			lower_32_bits(addr) & 0xFFFFF000);
2130 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
2131 			upper_32_bits(addr));
2132 
2133 	return 0;
2134 }
2135 
2136 static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr)
2137 {
2138 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2139 	uint32_t tmp;
2140 	int i;
2141 
2142 	/* Trigger an invalidation of the L1 instruction caches */
2143 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2144 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2145 	WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
2146 
2147 	/* Wait for invalidation complete */
2148 	for (i = 0; i < usec_timeout; i++) {
2149 		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2150 		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2151 					INVALIDATE_CACHE_COMPLETE))
2152 			break;
2153 		udelay(1);
2154 	}
2155 
2156 	if (i >= usec_timeout) {
2157 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2158 		return -EINVAL;
2159 	}
2160 
2161 	if (amdgpu_emu_mode == 1)
2162 		adev->hdp.funcs->flush_hdp(adev, NULL);
2163 
2164 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
2165 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
2166 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
2167 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
2168 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
2169 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
2170 
2171 	/* Program pfp ucode address into intruction cache address register */
2172 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
2173 			lower_32_bits(addr) & 0xFFFFF000);
2174 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
2175 			upper_32_bits(addr));
2176 
2177 	return 0;
2178 }
2179 
2180 static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr)
2181 {
2182 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2183 	uint32_t tmp;
2184 	int i;
2185 
2186 	/* Trigger an invalidation of the L1 instruction caches */
2187 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2188 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2189 
2190 	WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
2191 
2192 	/* Wait for invalidation complete */
2193 	for (i = 0; i < usec_timeout; i++) {
2194 		tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2195 		if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
2196 					INVALIDATE_CACHE_COMPLETE))
2197 			break;
2198 		udelay(1);
2199 	}
2200 
2201 	if (i >= usec_timeout) {
2202 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2203 		return -EINVAL;
2204 	}
2205 
2206 	if (amdgpu_emu_mode == 1)
2207 		adev->hdp.funcs->flush_hdp(adev, NULL);
2208 
2209 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
2210 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2211 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
2212 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
2213 	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
2214 
2215 	/* Program mec1 ucode address into intruction cache address register */
2216 	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO,
2217 			lower_32_bits(addr) & 0xFFFFF000);
2218 	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
2219 			upper_32_bits(addr));
2220 
2221 	return 0;
2222 }
2223 
2224 static int gfx_v11_0_config_pfp_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
2225 {
2226 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2227 	uint32_t tmp;
2228 	unsigned i, pipe_id;
2229 	const struct gfx_firmware_header_v2_0 *pfp_hdr;
2230 
2231 	pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
2232 		adev->gfx.pfp_fw->data;
2233 
2234 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
2235 		lower_32_bits(addr));
2236 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
2237 		upper_32_bits(addr));
2238 
2239 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
2240 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
2241 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
2242 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
2243 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
2244 
2245 	/*
2246 	 * Programming any of the CP_PFP_IC_BASE registers
2247 	 * forces invalidation of the ME L1 I$. Wait for the
2248 	 * invalidation complete
2249 	 */
2250 	for (i = 0; i < usec_timeout; i++) {
2251 		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2252 		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2253 			INVALIDATE_CACHE_COMPLETE))
2254 			break;
2255 		udelay(1);
2256 	}
2257 
2258 	if (i >= usec_timeout) {
2259 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2260 		return -EINVAL;
2261 	}
2262 
2263 	/* Prime the L1 instruction caches */
2264 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2265 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
2266 	WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
2267 	/* Waiting for cache primed*/
2268 	for (i = 0; i < usec_timeout; i++) {
2269 		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2270 		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2271 			ICACHE_PRIMED))
2272 			break;
2273 		udelay(1);
2274 	}
2275 
2276 	if (i >= usec_timeout) {
2277 		dev_err(adev->dev, "failed to prime instruction cache\n");
2278 		return -EINVAL;
2279 	}
2280 
2281 	mutex_lock(&adev->srbm_mutex);
2282 	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2283 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2284 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
2285 			(pfp_hdr->ucode_start_addr_hi << 30) |
2286 			(pfp_hdr->ucode_start_addr_lo >> 2));
2287 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
2288 			pfp_hdr->ucode_start_addr_hi >> 2);
2289 
2290 		/*
2291 		 * Program CP_ME_CNTL to reset given PIPE to take
2292 		 * effect of CP_PFP_PRGRM_CNTR_START.
2293 		 */
2294 		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2295 		if (pipe_id == 0)
2296 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2297 					PFP_PIPE0_RESET, 1);
2298 		else
2299 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2300 					PFP_PIPE1_RESET, 1);
2301 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2302 
2303 		/* Clear pfp pipe0 reset bit. */
2304 		if (pipe_id == 0)
2305 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2306 					PFP_PIPE0_RESET, 0);
2307 		else
2308 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2309 					PFP_PIPE1_RESET, 0);
2310 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2311 
2312 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
2313 			lower_32_bits(addr2));
2314 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
2315 			upper_32_bits(addr2));
2316 	}
2317 	soc21_grbm_select(adev, 0, 0, 0, 0);
2318 	mutex_unlock(&adev->srbm_mutex);
2319 
2320 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2321 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2322 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2323 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2324 
2325 	/* Invalidate the data caches */
2326 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2327 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2328 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2329 
2330 	for (i = 0; i < usec_timeout; i++) {
2331 		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2332 		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2333 			INVALIDATE_DCACHE_COMPLETE))
2334 			break;
2335 		udelay(1);
2336 	}
2337 
2338 	if (i >= usec_timeout) {
2339 		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2340 		return -EINVAL;
2341 	}
2342 
2343 	return 0;
2344 }
2345 
2346 static int gfx_v11_0_config_me_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
2347 {
2348 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2349 	uint32_t tmp;
2350 	unsigned i, pipe_id;
2351 	const struct gfx_firmware_header_v2_0 *me_hdr;
2352 
2353 	me_hdr = (const struct gfx_firmware_header_v2_0 *)
2354 		adev->gfx.me_fw->data;
2355 
2356 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
2357 		lower_32_bits(addr));
2358 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
2359 		upper_32_bits(addr));
2360 
2361 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
2362 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
2363 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
2364 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
2365 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
2366 
2367 	/*
2368 	 * Programming any of the CP_ME_IC_BASE registers
2369 	 * forces invalidation of the ME L1 I$. Wait for the
2370 	 * invalidation complete
2371 	 */
2372 	for (i = 0; i < usec_timeout; i++) {
2373 		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2374 		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2375 			INVALIDATE_CACHE_COMPLETE))
2376 			break;
2377 		udelay(1);
2378 	}
2379 
2380 	if (i >= usec_timeout) {
2381 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2382 		return -EINVAL;
2383 	}
2384 
2385 	/* Prime the instruction caches */
2386 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2387 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
2388 	WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
2389 
2390 	/* Waiting for instruction cache primed*/
2391 	for (i = 0; i < usec_timeout; i++) {
2392 		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2393 		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2394 			ICACHE_PRIMED))
2395 			break;
2396 		udelay(1);
2397 	}
2398 
2399 	if (i >= usec_timeout) {
2400 		dev_err(adev->dev, "failed to prime instruction cache\n");
2401 		return -EINVAL;
2402 	}
2403 
2404 	mutex_lock(&adev->srbm_mutex);
2405 	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2406 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2407 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
2408 			(me_hdr->ucode_start_addr_hi << 30) |
2409 			(me_hdr->ucode_start_addr_lo >> 2) );
2410 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
2411 			me_hdr->ucode_start_addr_hi>>2);
2412 
2413 		/*
2414 		 * Program CP_ME_CNTL to reset given PIPE to take
2415 		 * effect of CP_PFP_PRGRM_CNTR_START.
2416 		 */
2417 		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2418 		if (pipe_id == 0)
2419 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2420 					ME_PIPE0_RESET, 1);
2421 		else
2422 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2423 					ME_PIPE1_RESET, 1);
2424 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2425 
2426 		/* Clear pfp pipe0 reset bit. */
2427 		if (pipe_id == 0)
2428 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2429 					ME_PIPE0_RESET, 0);
2430 		else
2431 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2432 					ME_PIPE1_RESET, 0);
2433 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2434 
2435 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
2436 			lower_32_bits(addr2));
2437 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
2438 			upper_32_bits(addr2));
2439 	}
2440 	soc21_grbm_select(adev, 0, 0, 0, 0);
2441 	mutex_unlock(&adev->srbm_mutex);
2442 
2443 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2444 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2445 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2446 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2447 
2448 	/* Invalidate the data caches */
2449 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2450 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2451 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2452 
2453 	for (i = 0; i < usec_timeout; i++) {
2454 		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2455 		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2456 			INVALIDATE_DCACHE_COMPLETE))
2457 			break;
2458 		udelay(1);
2459 	}
2460 
2461 	if (i >= usec_timeout) {
2462 		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2463 		return -EINVAL;
2464 	}
2465 
2466 	return 0;
2467 }
2468 
2469 static int gfx_v11_0_config_mec_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
2470 {
2471 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2472 	uint32_t tmp;
2473 	unsigned i;
2474 	const struct gfx_firmware_header_v2_0 *mec_hdr;
2475 
2476 	mec_hdr = (const struct gfx_firmware_header_v2_0 *)
2477 		adev->gfx.mec_fw->data;
2478 
2479 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
2480 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
2481 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
2482 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2483 	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
2484 
2485 	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
2486 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
2487 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
2488 	WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
2489 
2490 	mutex_lock(&adev->srbm_mutex);
2491 	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
2492 		soc21_grbm_select(adev, 1, i, 0, 0);
2493 
2494 		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, addr2);
2495 		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
2496 		     upper_32_bits(addr2));
2497 
2498 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
2499 					mec_hdr->ucode_start_addr_lo >> 2 |
2500 					mec_hdr->ucode_start_addr_hi << 30);
2501 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
2502 					mec_hdr->ucode_start_addr_hi >> 2);
2503 
2504 		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, addr);
2505 		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
2506 		     upper_32_bits(addr));
2507 	}
2508 	mutex_unlock(&adev->srbm_mutex);
2509 	soc21_grbm_select(adev, 0, 0, 0, 0);
2510 
2511 	/* Trigger an invalidation of the L1 instruction caches */
2512 	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
2513 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2514 	WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
2515 
2516 	/* Wait for invalidation complete */
2517 	for (i = 0; i < usec_timeout; i++) {
2518 		tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
2519 		if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
2520 				       INVALIDATE_DCACHE_COMPLETE))
2521 			break;
2522 		udelay(1);
2523 	}
2524 
2525 	if (i >= usec_timeout) {
2526 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2527 		return -EINVAL;
2528 	}
2529 
2530 	/* Trigger an invalidation of the L1 instruction caches */
2531 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2532 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2533 	WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
2534 
2535 	/* Wait for invalidation complete */
2536 	for (i = 0; i < usec_timeout; i++) {
2537 		tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2538 		if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
2539 				       INVALIDATE_CACHE_COMPLETE))
2540 			break;
2541 		udelay(1);
2542 	}
2543 
2544 	if (i >= usec_timeout) {
2545 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2546 		return -EINVAL;
2547 	}
2548 
2549 	return 0;
2550 }
2551 
2552 static void gfx_v11_0_config_gfx_rs64(struct amdgpu_device *adev)
2553 {
2554 	const struct gfx_firmware_header_v2_0 *pfp_hdr;
2555 	const struct gfx_firmware_header_v2_0 *me_hdr;
2556 	const struct gfx_firmware_header_v2_0 *mec_hdr;
2557 	uint32_t pipe_id, tmp;
2558 
2559 	mec_hdr = (const struct gfx_firmware_header_v2_0 *)
2560 		adev->gfx.mec_fw->data;
2561 	me_hdr = (const struct gfx_firmware_header_v2_0 *)
2562 		adev->gfx.me_fw->data;
2563 	pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
2564 		adev->gfx.pfp_fw->data;
2565 
2566 	/* config pfp program start addr */
2567 	for (pipe_id = 0; pipe_id < 2; pipe_id++) {
2568 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2569 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
2570 			(pfp_hdr->ucode_start_addr_hi << 30) |
2571 			(pfp_hdr->ucode_start_addr_lo >> 2));
2572 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
2573 			pfp_hdr->ucode_start_addr_hi >> 2);
2574 	}
2575 	soc21_grbm_select(adev, 0, 0, 0, 0);
2576 
2577 	/* reset pfp pipe */
2578 	tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2579 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 1);
2580 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 1);
2581 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2582 
2583 	/* clear pfp pipe reset */
2584 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 0);
2585 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 0);
2586 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2587 
2588 	/* config me program start addr */
2589 	for (pipe_id = 0; pipe_id < 2; pipe_id++) {
2590 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2591 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
2592 			(me_hdr->ucode_start_addr_hi << 30) |
2593 			(me_hdr->ucode_start_addr_lo >> 2) );
2594 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
2595 			me_hdr->ucode_start_addr_hi>>2);
2596 	}
2597 	soc21_grbm_select(adev, 0, 0, 0, 0);
2598 
2599 	/* reset me pipe */
2600 	tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2601 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 1);
2602 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 1);
2603 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2604 
2605 	/* clear me pipe reset */
2606 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 0);
2607 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 0);
2608 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2609 
2610 	/* config mec program start addr */
2611 	for (pipe_id = 0; pipe_id < 4; pipe_id++) {
2612 		soc21_grbm_select(adev, 1, pipe_id, 0, 0);
2613 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
2614 					mec_hdr->ucode_start_addr_lo >> 2 |
2615 					mec_hdr->ucode_start_addr_hi << 30);
2616 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
2617 					mec_hdr->ucode_start_addr_hi >> 2);
2618 	}
2619 	soc21_grbm_select(adev, 0, 0, 0, 0);
2620 }
2621 
2622 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
2623 {
2624 	uint32_t cp_status;
2625 	uint32_t bootload_status;
2626 	int i, r;
2627 	uint64_t addr, addr2;
2628 
2629 	for (i = 0; i < adev->usec_timeout; i++) {
2630 		cp_status = RREG32_SOC15(GC, 0, regCP_STAT);
2631 
2632 		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 1))
2633 			bootload_status = RREG32_SOC15(GC, 0,
2634 					regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1);
2635 		else
2636 			bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS);
2637 
2638 		if ((cp_status == 0) &&
2639 		    (REG_GET_FIELD(bootload_status,
2640 			RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) {
2641 			break;
2642 		}
2643 		udelay(1);
2644 	}
2645 
2646 	if (i >= adev->usec_timeout) {
2647 		dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n");
2648 		return -ETIMEDOUT;
2649 	}
2650 
2651 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
2652 		if (adev->gfx.rs64_enable) {
2653 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2654 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME].offset;
2655 			addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
2656 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME_P0_STACK].offset;
2657 			r = gfx_v11_0_config_me_cache_rs64(adev, addr, addr2);
2658 			if (r)
2659 				return r;
2660 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2661 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP].offset;
2662 			addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
2663 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK].offset;
2664 			r = gfx_v11_0_config_pfp_cache_rs64(adev, addr, addr2);
2665 			if (r)
2666 				return r;
2667 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2668 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC].offset;
2669 			addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
2670 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK].offset;
2671 			r = gfx_v11_0_config_mec_cache_rs64(adev, addr, addr2);
2672 			if (r)
2673 				return r;
2674 		} else {
2675 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2676 				rlc_autoload_info[SOC21_FIRMWARE_ID_CP_ME].offset;
2677 			r = gfx_v11_0_config_me_cache(adev, addr);
2678 			if (r)
2679 				return r;
2680 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2681 				rlc_autoload_info[SOC21_FIRMWARE_ID_CP_PFP].offset;
2682 			r = gfx_v11_0_config_pfp_cache(adev, addr);
2683 			if (r)
2684 				return r;
2685 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2686 				rlc_autoload_info[SOC21_FIRMWARE_ID_CP_MEC].offset;
2687 			r = gfx_v11_0_config_mec_cache(adev, addr);
2688 			if (r)
2689 				return r;
2690 		}
2691 	}
2692 
2693 	return 0;
2694 }
2695 
2696 static int gfx_v11_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2697 {
2698 	int i;
2699 	u32 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2700 
2701 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2702 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2703 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2704 
2705 	for (i = 0; i < adev->usec_timeout; i++) {
2706 		if (RREG32_SOC15(GC, 0, regCP_STAT) == 0)
2707 			break;
2708 		udelay(1);
2709 	}
2710 
2711 	if (i >= adev->usec_timeout)
2712 		DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt");
2713 
2714 	return 0;
2715 }
2716 
2717 static int gfx_v11_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
2718 {
2719 	int r;
2720 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
2721 	const __le32 *fw_data;
2722 	unsigned i, fw_size;
2723 
2724 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2725 		adev->gfx.pfp_fw->data;
2726 
2727 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2728 
2729 	fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
2730 		le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2731 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes);
2732 
2733 	r = amdgpu_bo_create_reserved(adev, pfp_hdr->header.ucode_size_bytes,
2734 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
2735 				      &adev->gfx.pfp.pfp_fw_obj,
2736 				      &adev->gfx.pfp.pfp_fw_gpu_addr,
2737 				      (void **)&adev->gfx.pfp.pfp_fw_ptr);
2738 	if (r) {
2739 		dev_err(adev->dev, "(%d) failed to create pfp fw bo\n", r);
2740 		gfx_v11_0_pfp_fini(adev);
2741 		return r;
2742 	}
2743 
2744 	memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_data, fw_size);
2745 
2746 	amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
2747 	amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
2748 
2749 	gfx_v11_0_config_pfp_cache(adev, adev->gfx.pfp.pfp_fw_gpu_addr);
2750 
2751 	WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, 0);
2752 
2753 	for (i = 0; i < pfp_hdr->jt_size; i++)
2754 		WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_DATA,
2755 			     le32_to_cpup(fw_data + pfp_hdr->jt_offset + i));
2756 
2757 	WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2758 
2759 	return 0;
2760 }
2761 
2762 static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev)
2763 {
2764 	int r;
2765 	const struct gfx_firmware_header_v2_0 *pfp_hdr;
2766 	const __le32 *fw_ucode, *fw_data;
2767 	unsigned i, pipe_id, fw_ucode_size, fw_data_size;
2768 	uint32_t tmp;
2769 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2770 
2771 	pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
2772 		adev->gfx.pfp_fw->data;
2773 
2774 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2775 
2776 	/* instruction */
2777 	fw_ucode = (const __le32 *)(adev->gfx.pfp_fw->data +
2778 		le32_to_cpu(pfp_hdr->ucode_offset_bytes));
2779 	fw_ucode_size = le32_to_cpu(pfp_hdr->ucode_size_bytes);
2780 	/* data */
2781 	fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
2782 		le32_to_cpu(pfp_hdr->data_offset_bytes));
2783 	fw_data_size = le32_to_cpu(pfp_hdr->data_size_bytes);
2784 
2785 	/* 64kb align */
2786 	r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
2787 				      64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
2788 				      &adev->gfx.pfp.pfp_fw_obj,
2789 				      &adev->gfx.pfp.pfp_fw_gpu_addr,
2790 				      (void **)&adev->gfx.pfp.pfp_fw_ptr);
2791 	if (r) {
2792 		dev_err(adev->dev, "(%d) failed to create pfp ucode fw bo\n", r);
2793 		gfx_v11_0_pfp_fini(adev);
2794 		return r;
2795 	}
2796 
2797 	r = amdgpu_bo_create_reserved(adev, fw_data_size,
2798 				      64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
2799 				      &adev->gfx.pfp.pfp_fw_data_obj,
2800 				      &adev->gfx.pfp.pfp_fw_data_gpu_addr,
2801 				      (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
2802 	if (r) {
2803 		dev_err(adev->dev, "(%d) failed to create pfp data fw bo\n", r);
2804 		gfx_v11_0_pfp_fini(adev);
2805 		return r;
2806 	}
2807 
2808 	memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_ucode, fw_ucode_size);
2809 	memcpy(adev->gfx.pfp.pfp_fw_data_ptr, fw_data, fw_data_size);
2810 
2811 	amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
2812 	amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_data_obj);
2813 	amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
2814 	amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj);
2815 
2816 	if (amdgpu_emu_mode == 1)
2817 		adev->hdp.funcs->flush_hdp(adev, NULL);
2818 
2819 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
2820 		lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
2821 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
2822 		upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
2823 
2824 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
2825 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
2826 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
2827 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
2828 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
2829 
2830 	/*
2831 	 * Programming any of the CP_PFP_IC_BASE registers
2832 	 * forces invalidation of the ME L1 I$. Wait for the
2833 	 * invalidation complete
2834 	 */
2835 	for (i = 0; i < usec_timeout; i++) {
2836 		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2837 		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2838 			INVALIDATE_CACHE_COMPLETE))
2839 			break;
2840 		udelay(1);
2841 	}
2842 
2843 	if (i >= usec_timeout) {
2844 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2845 		return -EINVAL;
2846 	}
2847 
2848 	/* Prime the L1 instruction caches */
2849 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2850 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
2851 	WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
2852 	/* Waiting for cache primed*/
2853 	for (i = 0; i < usec_timeout; i++) {
2854 		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2855 		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2856 			ICACHE_PRIMED))
2857 			break;
2858 		udelay(1);
2859 	}
2860 
2861 	if (i >= usec_timeout) {
2862 		dev_err(adev->dev, "failed to prime instruction cache\n");
2863 		return -EINVAL;
2864 	}
2865 
2866 	mutex_lock(&adev->srbm_mutex);
2867 	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2868 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2869 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
2870 			(pfp_hdr->ucode_start_addr_hi << 30) |
2871 			(pfp_hdr->ucode_start_addr_lo >> 2) );
2872 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
2873 			pfp_hdr->ucode_start_addr_hi>>2);
2874 
2875 		/*
2876 		 * Program CP_ME_CNTL to reset given PIPE to take
2877 		 * effect of CP_PFP_PRGRM_CNTR_START.
2878 		 */
2879 		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2880 		if (pipe_id == 0)
2881 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2882 					PFP_PIPE0_RESET, 1);
2883 		else
2884 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2885 					PFP_PIPE1_RESET, 1);
2886 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2887 
2888 		/* Clear pfp pipe0 reset bit. */
2889 		if (pipe_id == 0)
2890 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2891 					PFP_PIPE0_RESET, 0);
2892 		else
2893 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2894 					PFP_PIPE1_RESET, 0);
2895 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2896 
2897 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
2898 			lower_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
2899 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
2900 			upper_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
2901 	}
2902 	soc21_grbm_select(adev, 0, 0, 0, 0);
2903 	mutex_unlock(&adev->srbm_mutex);
2904 
2905 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2906 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2907 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2908 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2909 
2910 	/* Invalidate the data caches */
2911 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2912 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2913 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2914 
2915 	for (i = 0; i < usec_timeout; i++) {
2916 		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2917 		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2918 			INVALIDATE_DCACHE_COMPLETE))
2919 			break;
2920 		udelay(1);
2921 	}
2922 
2923 	if (i >= usec_timeout) {
2924 		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2925 		return -EINVAL;
2926 	}
2927 
2928 	return 0;
2929 }
2930 
2931 static int gfx_v11_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev)
2932 {
2933 	int r;
2934 	const struct gfx_firmware_header_v1_0 *me_hdr;
2935 	const __le32 *fw_data;
2936 	unsigned i, fw_size;
2937 
2938 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
2939 		adev->gfx.me_fw->data;
2940 
2941 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2942 
2943 	fw_data = (const __le32 *)(adev->gfx.me_fw->data +
2944 		le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2945 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes);
2946 
2947 	r = amdgpu_bo_create_reserved(adev, me_hdr->header.ucode_size_bytes,
2948 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
2949 				      &adev->gfx.me.me_fw_obj,
2950 				      &adev->gfx.me.me_fw_gpu_addr,
2951 				      (void **)&adev->gfx.me.me_fw_ptr);
2952 	if (r) {
2953 		dev_err(adev->dev, "(%d) failed to create me fw bo\n", r);
2954 		gfx_v11_0_me_fini(adev);
2955 		return r;
2956 	}
2957 
2958 	memcpy(adev->gfx.me.me_fw_ptr, fw_data, fw_size);
2959 
2960 	amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
2961 	amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
2962 
2963 	gfx_v11_0_config_me_cache(adev, adev->gfx.me.me_fw_gpu_addr);
2964 
2965 	WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, 0);
2966 
2967 	for (i = 0; i < me_hdr->jt_size; i++)
2968 		WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_DATA,
2969 			     le32_to_cpup(fw_data + me_hdr->jt_offset + i));
2970 
2971 	WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, adev->gfx.me_fw_version);
2972 
2973 	return 0;
2974 }
2975 
2976 static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev)
2977 {
2978 	int r;
2979 	const struct gfx_firmware_header_v2_0 *me_hdr;
2980 	const __le32 *fw_ucode, *fw_data;
2981 	unsigned i, pipe_id, fw_ucode_size, fw_data_size;
2982 	uint32_t tmp;
2983 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2984 
2985 	me_hdr = (const struct gfx_firmware_header_v2_0 *)
2986 		adev->gfx.me_fw->data;
2987 
2988 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2989 
2990 	/* instruction */
2991 	fw_ucode = (const __le32 *)(adev->gfx.me_fw->data +
2992 		le32_to_cpu(me_hdr->ucode_offset_bytes));
2993 	fw_ucode_size = le32_to_cpu(me_hdr->ucode_size_bytes);
2994 	/* data */
2995 	fw_data = (const __le32 *)(adev->gfx.me_fw->data +
2996 		le32_to_cpu(me_hdr->data_offset_bytes));
2997 	fw_data_size = le32_to_cpu(me_hdr->data_size_bytes);
2998 
2999 	/* 64kb align*/
3000 	r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
3001 				      64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
3002 				      &adev->gfx.me.me_fw_obj,
3003 				      &adev->gfx.me.me_fw_gpu_addr,
3004 				      (void **)&adev->gfx.me.me_fw_ptr);
3005 	if (r) {
3006 		dev_err(adev->dev, "(%d) failed to create me ucode bo\n", r);
3007 		gfx_v11_0_me_fini(adev);
3008 		return r;
3009 	}
3010 
3011 	r = amdgpu_bo_create_reserved(adev, fw_data_size,
3012 				      64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
3013 				      &adev->gfx.me.me_fw_data_obj,
3014 				      &adev->gfx.me.me_fw_data_gpu_addr,
3015 				      (void **)&adev->gfx.me.me_fw_data_ptr);
3016 	if (r) {
3017 		dev_err(adev->dev, "(%d) failed to create me data bo\n", r);
3018 		gfx_v11_0_pfp_fini(adev);
3019 		return r;
3020 	}
3021 
3022 	memcpy(adev->gfx.me.me_fw_ptr, fw_ucode, fw_ucode_size);
3023 	memcpy(adev->gfx.me.me_fw_data_ptr, fw_data, fw_data_size);
3024 
3025 	amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
3026 	amdgpu_bo_kunmap(adev->gfx.me.me_fw_data_obj);
3027 	amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
3028 	amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj);
3029 
3030 	if (amdgpu_emu_mode == 1)
3031 		adev->hdp.funcs->flush_hdp(adev, NULL);
3032 
3033 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
3034 		lower_32_bits(adev->gfx.me.me_fw_gpu_addr));
3035 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
3036 		upper_32_bits(adev->gfx.me.me_fw_gpu_addr));
3037 
3038 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
3039 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
3040 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
3041 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
3042 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
3043 
3044 	/*
3045 	 * Programming any of the CP_ME_IC_BASE registers
3046 	 * forces invalidation of the ME L1 I$. Wait for the
3047 	 * invalidation complete
3048 	 */
3049 	for (i = 0; i < usec_timeout; i++) {
3050 		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
3051 		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
3052 			INVALIDATE_CACHE_COMPLETE))
3053 			break;
3054 		udelay(1);
3055 	}
3056 
3057 	if (i >= usec_timeout) {
3058 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
3059 		return -EINVAL;
3060 	}
3061 
3062 	/* Prime the instruction caches */
3063 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
3064 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
3065 	WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
3066 
3067 	/* Waiting for instruction cache primed*/
3068 	for (i = 0; i < usec_timeout; i++) {
3069 		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
3070 		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
3071 			ICACHE_PRIMED))
3072 			break;
3073 		udelay(1);
3074 	}
3075 
3076 	if (i >= usec_timeout) {
3077 		dev_err(adev->dev, "failed to prime instruction cache\n");
3078 		return -EINVAL;
3079 	}
3080 
3081 	mutex_lock(&adev->srbm_mutex);
3082 	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
3083 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
3084 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
3085 			(me_hdr->ucode_start_addr_hi << 30) |
3086 			(me_hdr->ucode_start_addr_lo >> 2) );
3087 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
3088 			me_hdr->ucode_start_addr_hi>>2);
3089 
3090 		/*
3091 		 * Program CP_ME_CNTL to reset given PIPE to take
3092 		 * effect of CP_PFP_PRGRM_CNTR_START.
3093 		 */
3094 		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
3095 		if (pipe_id == 0)
3096 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3097 					ME_PIPE0_RESET, 1);
3098 		else
3099 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3100 					ME_PIPE1_RESET, 1);
3101 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
3102 
3103 		/* Clear pfp pipe0 reset bit. */
3104 		if (pipe_id == 0)
3105 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3106 					ME_PIPE0_RESET, 0);
3107 		else
3108 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3109 					ME_PIPE1_RESET, 0);
3110 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
3111 
3112 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
3113 			lower_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
3114 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
3115 			upper_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
3116 	}
3117 	soc21_grbm_select(adev, 0, 0, 0, 0);
3118 	mutex_unlock(&adev->srbm_mutex);
3119 
3120 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
3121 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
3122 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
3123 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
3124 
3125 	/* Invalidate the data caches */
3126 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
3127 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
3128 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
3129 
3130 	for (i = 0; i < usec_timeout; i++) {
3131 		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
3132 		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
3133 			INVALIDATE_DCACHE_COMPLETE))
3134 			break;
3135 		udelay(1);
3136 	}
3137 
3138 	if (i >= usec_timeout) {
3139 		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
3140 		return -EINVAL;
3141 	}
3142 
3143 	return 0;
3144 }
3145 
3146 static int gfx_v11_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3147 {
3148 	int r;
3149 
3150 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw)
3151 		return -EINVAL;
3152 
3153 	gfx_v11_0_cp_gfx_enable(adev, false);
3154 
3155 	if (adev->gfx.rs64_enable)
3156 		r = gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(adev);
3157 	else
3158 		r = gfx_v11_0_cp_gfx_load_pfp_microcode(adev);
3159 	if (r) {
3160 		dev_err(adev->dev, "(%d) failed to load pfp fw\n", r);
3161 		return r;
3162 	}
3163 
3164 	if (adev->gfx.rs64_enable)
3165 		r = gfx_v11_0_cp_gfx_load_me_microcode_rs64(adev);
3166 	else
3167 		r = gfx_v11_0_cp_gfx_load_me_microcode(adev);
3168 	if (r) {
3169 		dev_err(adev->dev, "(%d) failed to load me fw\n", r);
3170 		return r;
3171 	}
3172 
3173 	return 0;
3174 }
3175 
3176 static int gfx_v11_0_cp_gfx_start(struct amdgpu_device *adev)
3177 {
3178 	struct amdgpu_ring *ring;
3179 	const struct cs_section_def *sect = NULL;
3180 	const struct cs_extent_def *ext = NULL;
3181 	int r, i;
3182 	int ctx_reg_offset;
3183 
3184 	/* init the CP */
3185 	WREG32_SOC15(GC, 0, regCP_MAX_CONTEXT,
3186 		     adev->gfx.config.max_hw_contexts - 1);
3187 	WREG32_SOC15(GC, 0, regCP_DEVICE_ID, 1);
3188 
3189 	if (!amdgpu_async_gfx_ring)
3190 		gfx_v11_0_cp_gfx_enable(adev, true);
3191 
3192 	ring = &adev->gfx.gfx_ring[0];
3193 	r = amdgpu_ring_alloc(ring, gfx_v11_0_get_csb_size(adev));
3194 	if (r) {
3195 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3196 		return r;
3197 	}
3198 
3199 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3200 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3201 
3202 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3203 	amdgpu_ring_write(ring, 0x80000000);
3204 	amdgpu_ring_write(ring, 0x80000000);
3205 
3206 	for (sect = gfx11_cs_data; sect->section != NULL; ++sect) {
3207 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3208 			if (sect->id == SECT_CONTEXT) {
3209 				amdgpu_ring_write(ring,
3210 						  PACKET3(PACKET3_SET_CONTEXT_REG,
3211 							  ext->reg_count));
3212 				amdgpu_ring_write(ring, ext->reg_index -
3213 						  PACKET3_SET_CONTEXT_REG_START);
3214 				for (i = 0; i < ext->reg_count; i++)
3215 					amdgpu_ring_write(ring, ext->extent[i]);
3216 			}
3217 		}
3218 	}
3219 
3220 	ctx_reg_offset =
3221 		SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
3222 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
3223 	amdgpu_ring_write(ring, ctx_reg_offset);
3224 	amdgpu_ring_write(ring, adev->gfx.config.pa_sc_tile_steering_override);
3225 
3226 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3227 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3228 
3229 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3230 	amdgpu_ring_write(ring, 0);
3231 
3232 	amdgpu_ring_commit(ring);
3233 
3234 	/* submit cs packet to copy state 0 to next available state */
3235 	if (adev->gfx.num_gfx_rings > 1) {
3236 		/* maximum supported gfx ring is 2 */
3237 		ring = &adev->gfx.gfx_ring[1];
3238 		r = amdgpu_ring_alloc(ring, 2);
3239 		if (r) {
3240 			DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3241 			return r;
3242 		}
3243 
3244 		amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3245 		amdgpu_ring_write(ring, 0);
3246 
3247 		amdgpu_ring_commit(ring);
3248 	}
3249 	return 0;
3250 }
3251 
3252 static void gfx_v11_0_cp_gfx_switch_pipe(struct amdgpu_device *adev,
3253 					 CP_PIPE_ID pipe)
3254 {
3255 	u32 tmp;
3256 
3257 	tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
3258 	tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe);
3259 
3260 	WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
3261 }
3262 
3263 static void gfx_v11_0_cp_gfx_set_doorbell(struct amdgpu_device *adev,
3264 					  struct amdgpu_ring *ring)
3265 {
3266 	u32 tmp;
3267 
3268 	tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
3269 	if (ring->use_doorbell) {
3270 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3271 				    DOORBELL_OFFSET, ring->doorbell_index);
3272 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3273 				    DOORBELL_EN, 1);
3274 	} else {
3275 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3276 				    DOORBELL_EN, 0);
3277 	}
3278 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, tmp);
3279 
3280 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3281 			    DOORBELL_RANGE_LOWER, ring->doorbell_index);
3282 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, tmp);
3283 
3284 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
3285 		     CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3286 }
3287 
3288 static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev)
3289 {
3290 	struct amdgpu_ring *ring;
3291 	u32 tmp;
3292 	u32 rb_bufsz;
3293 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
3294 	u32 i;
3295 
3296 	/* Set the write pointer delay */
3297 	WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0);
3298 
3299 	/* set the RB to use vmid 0 */
3300 	WREG32_SOC15(GC, 0, regCP_RB_VMID, 0);
3301 
3302 	/* Init gfx ring 0 for pipe 0 */
3303 	mutex_lock(&adev->srbm_mutex);
3304 	gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
3305 
3306 	/* Set ring buffer size */
3307 	ring = &adev->gfx.gfx_ring[0];
3308 	rb_bufsz = order_base_2(ring->ring_size / 8);
3309 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3310 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3311 	WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
3312 
3313 	/* Initialize the ring buffer's write pointers */
3314 	ring->wptr = 0;
3315 	WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr));
3316 	WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3317 
3318 	/* set the wb address wether it's enabled or not */
3319 	rptr_addr = ring->rptr_gpu_addr;
3320 	WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3321 	WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
3322 		     CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3323 
3324 	wptr_gpu_addr = ring->wptr_gpu_addr;
3325 	WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
3326 		     lower_32_bits(wptr_gpu_addr));
3327 	WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
3328 		     upper_32_bits(wptr_gpu_addr));
3329 
3330 	mdelay(1);
3331 	WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
3332 
3333 	rb_addr = ring->gpu_addr >> 8;
3334 	WREG32_SOC15(GC, 0, regCP_RB0_BASE, rb_addr);
3335 	WREG32_SOC15(GC, 0, regCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3336 
3337 	WREG32_SOC15(GC, 0, regCP_RB_ACTIVE, 1);
3338 
3339 	gfx_v11_0_cp_gfx_set_doorbell(adev, ring);
3340 	mutex_unlock(&adev->srbm_mutex);
3341 
3342 	/* Init gfx ring 1 for pipe 1 */
3343 	if (adev->gfx.num_gfx_rings > 1) {
3344 		mutex_lock(&adev->srbm_mutex);
3345 		gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID1);
3346 		/* maximum supported gfx ring is 2 */
3347 		ring = &adev->gfx.gfx_ring[1];
3348 		rb_bufsz = order_base_2(ring->ring_size / 8);
3349 		tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz);
3350 		tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2);
3351 		WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp);
3352 		/* Initialize the ring buffer's write pointers */
3353 		ring->wptr = 0;
3354 		WREG32_SOC15(GC, 0, regCP_RB1_WPTR, lower_32_bits(ring->wptr));
3355 		WREG32_SOC15(GC, 0, regCP_RB1_WPTR_HI, upper_32_bits(ring->wptr));
3356 		/* Set the wb address wether it's enabled or not */
3357 		rptr_addr = ring->rptr_gpu_addr;
3358 		WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
3359 		WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
3360 			     CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3361 		wptr_gpu_addr = ring->wptr_gpu_addr;
3362 		WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
3363 			     lower_32_bits(wptr_gpu_addr));
3364 		WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
3365 			     upper_32_bits(wptr_gpu_addr));
3366 
3367 		mdelay(1);
3368 		WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp);
3369 
3370 		rb_addr = ring->gpu_addr >> 8;
3371 		WREG32_SOC15(GC, 0, regCP_RB1_BASE, rb_addr);
3372 		WREG32_SOC15(GC, 0, regCP_RB1_BASE_HI, upper_32_bits(rb_addr));
3373 		WREG32_SOC15(GC, 0, regCP_RB1_ACTIVE, 1);
3374 
3375 		gfx_v11_0_cp_gfx_set_doorbell(adev, ring);
3376 		mutex_unlock(&adev->srbm_mutex);
3377 	}
3378 	/* Switch to pipe 0 */
3379 	mutex_lock(&adev->srbm_mutex);
3380 	gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
3381 	mutex_unlock(&adev->srbm_mutex);
3382 
3383 	/* start the ring */
3384 	gfx_v11_0_cp_gfx_start(adev);
3385 
3386 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
3387 		ring = &adev->gfx.gfx_ring[i];
3388 		ring->sched.ready = true;
3389 	}
3390 
3391 	return 0;
3392 }
3393 
3394 static void gfx_v11_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3395 {
3396 	u32 data;
3397 
3398 	if (adev->gfx.rs64_enable) {
3399 		data = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
3400 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE,
3401 							 enable ? 0 : 1);
3402 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET,
3403 							 enable ? 0 : 1);
3404 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET,
3405 							 enable ? 0 : 1);
3406 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET,
3407 							 enable ? 0 : 1);
3408 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET,
3409 							 enable ? 0 : 1);
3410 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE,
3411 							 enable ? 1 : 0);
3412 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE,
3413 				                         enable ? 1 : 0);
3414 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE,
3415 							 enable ? 1 : 0);
3416 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE,
3417 							 enable ? 1 : 0);
3418 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT,
3419 							 enable ? 0 : 1);
3420 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, data);
3421 	} else {
3422 		data = RREG32_SOC15(GC, 0, regCP_MEC_CNTL);
3423 
3424 		if (enable) {
3425 			data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 0);
3426 			if (!adev->enable_mes_kiq)
3427 				data = REG_SET_FIELD(data, CP_MEC_CNTL,
3428 						     MEC_ME2_HALT, 0);
3429 		} else {
3430 			data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 1);
3431 			data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME2_HALT, 1);
3432 		}
3433 		WREG32_SOC15(GC, 0, regCP_MEC_CNTL, data);
3434 	}
3435 
3436 	adev->gfx.kiq.ring.sched.ready = enable;
3437 
3438 	udelay(50);
3439 }
3440 
3441 static int gfx_v11_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3442 {
3443 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3444 	const __le32 *fw_data;
3445 	unsigned i, fw_size;
3446 	u32 *fw = NULL;
3447 	int r;
3448 
3449 	if (!adev->gfx.mec_fw)
3450 		return -EINVAL;
3451 
3452 	gfx_v11_0_cp_compute_enable(adev, false);
3453 
3454 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3455 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3456 
3457 	fw_data = (const __le32 *)
3458 		(adev->gfx.mec_fw->data +
3459 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3460 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
3461 
3462 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
3463 					  PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
3464 					  &adev->gfx.mec.mec_fw_obj,
3465 					  &adev->gfx.mec.mec_fw_gpu_addr,
3466 					  (void **)&fw);
3467 	if (r) {
3468 		dev_err(adev->dev, "(%d) failed to create mec fw bo\n", r);
3469 		gfx_v11_0_mec_fini(adev);
3470 		return r;
3471 	}
3472 
3473 	memcpy(fw, fw_data, fw_size);
3474 
3475 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
3476 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
3477 
3478 	gfx_v11_0_config_mec_cache(adev, adev->gfx.mec.mec_fw_gpu_addr);
3479 
3480 	/* MEC1 */
3481 	WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, 0);
3482 
3483 	for (i = 0; i < mec_hdr->jt_size; i++)
3484 		WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_DATA,
3485 			     le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3486 
3487 	WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
3488 
3489 	return 0;
3490 }
3491 
3492 static int gfx_v11_0_cp_compute_load_microcode_rs64(struct amdgpu_device *adev)
3493 {
3494 	const struct gfx_firmware_header_v2_0 *mec_hdr;
3495 	const __le32 *fw_ucode, *fw_data;
3496 	u32 tmp, fw_ucode_size, fw_data_size;
3497 	u32 i, usec_timeout = 50000; /* Wait for 50 ms */
3498 	u32 *fw_ucode_ptr, *fw_data_ptr;
3499 	int r;
3500 
3501 	if (!adev->gfx.mec_fw)
3502 		return -EINVAL;
3503 
3504 	gfx_v11_0_cp_compute_enable(adev, false);
3505 
3506 	mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
3507 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3508 
3509 	fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data +
3510 				le32_to_cpu(mec_hdr->ucode_offset_bytes));
3511 	fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes);
3512 
3513 	fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
3514 				le32_to_cpu(mec_hdr->data_offset_bytes));
3515 	fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes);
3516 
3517 	r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
3518 				      64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
3519 				      &adev->gfx.mec.mec_fw_obj,
3520 				      &adev->gfx.mec.mec_fw_gpu_addr,
3521 				      (void **)&fw_ucode_ptr);
3522 	if (r) {
3523 		dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
3524 		gfx_v11_0_mec_fini(adev);
3525 		return r;
3526 	}
3527 
3528 	r = amdgpu_bo_create_reserved(adev, fw_data_size,
3529 				      64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
3530 				      &adev->gfx.mec.mec_fw_data_obj,
3531 				      &adev->gfx.mec.mec_fw_data_gpu_addr,
3532 				      (void **)&fw_data_ptr);
3533 	if (r) {
3534 		dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
3535 		gfx_v11_0_mec_fini(adev);
3536 		return r;
3537 	}
3538 
3539 	memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size);
3540 	memcpy(fw_data_ptr, fw_data, fw_data_size);
3541 
3542 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
3543 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj);
3544 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
3545 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj);
3546 
3547 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
3548 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3549 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
3550 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3551 	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
3552 
3553 	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
3554 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
3555 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
3556 	WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
3557 
3558 	mutex_lock(&adev->srbm_mutex);
3559 	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
3560 		soc21_grbm_select(adev, 1, i, 0, 0);
3561 
3562 		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, adev->gfx.mec.mec_fw_data_gpu_addr);
3563 		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
3564 		     upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr));
3565 
3566 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
3567 					mec_hdr->ucode_start_addr_lo >> 2 |
3568 					mec_hdr->ucode_start_addr_hi << 30);
3569 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
3570 					mec_hdr->ucode_start_addr_hi >> 2);
3571 
3572 		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr);
3573 		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
3574 		     upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3575 	}
3576 	mutex_unlock(&adev->srbm_mutex);
3577 	soc21_grbm_select(adev, 0, 0, 0, 0);
3578 
3579 	/* Trigger an invalidation of the L1 instruction caches */
3580 	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
3581 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
3582 	WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
3583 
3584 	/* Wait for invalidation complete */
3585 	for (i = 0; i < usec_timeout; i++) {
3586 		tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
3587 		if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
3588 				       INVALIDATE_DCACHE_COMPLETE))
3589 			break;
3590 		udelay(1);
3591 	}
3592 
3593 	if (i >= usec_timeout) {
3594 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
3595 		return -EINVAL;
3596 	}
3597 
3598 	/* Trigger an invalidation of the L1 instruction caches */
3599 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
3600 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
3601 	WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
3602 
3603 	/* Wait for invalidation complete */
3604 	for (i = 0; i < usec_timeout; i++) {
3605 		tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
3606 		if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
3607 				       INVALIDATE_CACHE_COMPLETE))
3608 			break;
3609 		udelay(1);
3610 	}
3611 
3612 	if (i >= usec_timeout) {
3613 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
3614 		return -EINVAL;
3615 	}
3616 
3617 	return 0;
3618 }
3619 
3620 static void gfx_v11_0_kiq_setting(struct amdgpu_ring *ring)
3621 {
3622 	uint32_t tmp;
3623 	struct amdgpu_device *adev = ring->adev;
3624 
3625 	/* tell RLC which is KIQ queue */
3626 	tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
3627 	tmp &= 0xffffff00;
3628 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3629 	WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
3630 	tmp |= 0x80;
3631 	WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
3632 }
3633 
3634 static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev)
3635 {
3636 	/* set graphics engine doorbell range */
3637 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER,
3638 		     (adev->doorbell_index.gfx_ring0 * 2) << 2);
3639 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
3640 		     (adev->doorbell_index.gfx_userqueue_end * 2) << 2);
3641 
3642 	/* set compute engine doorbell range */
3643 	WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
3644 		     (adev->doorbell_index.kiq * 2) << 2);
3645 	WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
3646 		     (adev->doorbell_index.userqueue_end * 2) << 2);
3647 }
3648 
3649 static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
3650 				  struct amdgpu_mqd_prop *prop)
3651 {
3652 	struct v11_gfx_mqd *mqd = m;
3653 	uint64_t hqd_gpu_addr, wb_gpu_addr;
3654 	uint32_t tmp;
3655 	uint32_t rb_bufsz;
3656 
3657 	/* set up gfx hqd wptr */
3658 	mqd->cp_gfx_hqd_wptr = 0;
3659 	mqd->cp_gfx_hqd_wptr_hi = 0;
3660 
3661 	/* set the pointer to the MQD */
3662 	mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc;
3663 	mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
3664 
3665 	/* set up mqd control */
3666 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL);
3667 	tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0);
3668 	tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1);
3669 	tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0);
3670 	mqd->cp_gfx_mqd_control = tmp;
3671 
3672 	/* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */
3673 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID);
3674 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
3675 	mqd->cp_gfx_hqd_vmid = 0;
3676 
3677 	/* set up default queue priority level
3678 	 * 0x0 = low priority, 0x1 = high priority */
3679 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY);
3680 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0);
3681 	mqd->cp_gfx_hqd_queue_priority = tmp;
3682 
3683 	/* set up time quantum */
3684 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM);
3685 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1);
3686 	mqd->cp_gfx_hqd_quantum = tmp;
3687 
3688 	/* set up gfx hqd base. this is similar as CP_RB_BASE */
3689 	hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
3690 	mqd->cp_gfx_hqd_base = hqd_gpu_addr;
3691 	mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr);
3692 
3693 	/* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */
3694 	wb_gpu_addr = prop->rptr_gpu_addr;
3695 	mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc;
3696 	mqd->cp_gfx_hqd_rptr_addr_hi =
3697 		upper_32_bits(wb_gpu_addr) & 0xffff;
3698 
3699 	/* set up rb_wptr_poll addr */
3700 	wb_gpu_addr = prop->wptr_gpu_addr;
3701 	mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3702 	mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3703 
3704 	/* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
3705 	rb_bufsz = order_base_2(prop->queue_size / 4) - 1;
3706 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL);
3707 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
3708 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
3709 #ifdef __BIG_ENDIAN
3710 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
3711 #endif
3712 	mqd->cp_gfx_hqd_cntl = tmp;
3713 
3714 	/* set up cp_doorbell_control */
3715 	tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
3716 	if (prop->use_doorbell) {
3717 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3718 				    DOORBELL_OFFSET, prop->doorbell_index);
3719 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3720 				    DOORBELL_EN, 1);
3721 	} else
3722 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3723 				    DOORBELL_EN, 0);
3724 	mqd->cp_rb_doorbell_control = tmp;
3725 
3726 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3727 	mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR);
3728 
3729 	/* active the queue */
3730 	mqd->cp_gfx_hqd_active = 1;
3731 
3732 	return 0;
3733 }
3734 
3735 #ifdef BRING_UP_DEBUG
3736 static int gfx_v11_0_gfx_queue_init_register(struct amdgpu_ring *ring)
3737 {
3738 	struct amdgpu_device *adev = ring->adev;
3739 	struct v11_gfx_mqd *mqd = ring->mqd_ptr;
3740 
3741 	/* set mmCP_GFX_HQD_WPTR/_HI to 0 */
3742 	WREG32_SOC15(GC, 0, regCP_GFX_HQD_WPTR, mqd->cp_gfx_hqd_wptr);
3743 	WREG32_SOC15(GC, 0, regCP_GFX_HQD_WPTR_HI, mqd->cp_gfx_hqd_wptr_hi);
3744 
3745 	/* set GFX_MQD_BASE */
3746 	WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr);
3747 	WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
3748 
3749 	/* set GFX_MQD_CONTROL */
3750 	WREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL, mqd->cp_gfx_mqd_control);
3751 
3752 	/* set GFX_HQD_VMID to 0 */
3753 	WREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID, mqd->cp_gfx_hqd_vmid);
3754 
3755 	WREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY,
3756 			mqd->cp_gfx_hqd_queue_priority);
3757 	WREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM, mqd->cp_gfx_hqd_quantum);
3758 
3759 	/* set GFX_HQD_BASE, similar as CP_RB_BASE */
3760 	WREG32_SOC15(GC, 0, regCP_GFX_HQD_BASE, mqd->cp_gfx_hqd_base);
3761 	WREG32_SOC15(GC, 0, regCP_GFX_HQD_BASE_HI, mqd->cp_gfx_hqd_base_hi);
3762 
3763 	/* set GFX_HQD_RPTR_ADDR, similar as CP_RB_RPTR */
3764 	WREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR_ADDR, mqd->cp_gfx_hqd_rptr_addr);
3765 	WREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR_ADDR_HI, mqd->cp_gfx_hqd_rptr_addr_hi);
3766 
3767 	/* set GFX_HQD_CNTL, similar as CP_RB_CNTL */
3768 	WREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL, mqd->cp_gfx_hqd_cntl);
3769 
3770 	/* set RB_WPTR_POLL_ADDR */
3771 	WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO, mqd->cp_rb_wptr_poll_addr_lo);
3772 	WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI, mqd->cp_rb_wptr_poll_addr_hi);
3773 
3774 	/* set RB_DOORBELL_CONTROL */
3775 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, mqd->cp_rb_doorbell_control);
3776 
3777 	/* active the queue */
3778 	WREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE, mqd->cp_gfx_hqd_active);
3779 
3780 	return 0;
3781 }
3782 #endif
3783 
3784 static int gfx_v11_0_gfx_init_queue(struct amdgpu_ring *ring)
3785 {
3786 	struct amdgpu_device *adev = ring->adev;
3787 	struct v11_gfx_mqd *mqd = ring->mqd_ptr;
3788 	int mqd_idx = ring - &adev->gfx.gfx_ring[0];
3789 
3790 	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
3791 		memset((void *)mqd, 0, sizeof(*mqd));
3792 		mutex_lock(&adev->srbm_mutex);
3793 		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3794 		amdgpu_ring_init_mqd(ring);
3795 #ifdef BRING_UP_DEBUG
3796 		gfx_v11_0_gfx_queue_init_register(ring);
3797 #endif
3798 		soc21_grbm_select(adev, 0, 0, 0, 0);
3799 		mutex_unlock(&adev->srbm_mutex);
3800 		if (adev->gfx.me.mqd_backup[mqd_idx])
3801 			memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
3802 	} else if (amdgpu_in_reset(adev)) {
3803 		/* reset mqd with the backup copy */
3804 		if (adev->gfx.me.mqd_backup[mqd_idx])
3805 			memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
3806 		/* reset the ring */
3807 		ring->wptr = 0;
3808 		*ring->wptr_cpu_addr = 0;
3809 		amdgpu_ring_clear_ring(ring);
3810 #ifdef BRING_UP_DEBUG
3811 		mutex_lock(&adev->srbm_mutex);
3812 		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3813 		gfx_v11_0_gfx_queue_init_register(ring);
3814 		soc21_grbm_select(adev, 0, 0, 0, 0);
3815 		mutex_unlock(&adev->srbm_mutex);
3816 #endif
3817 	} else {
3818 		amdgpu_ring_clear_ring(ring);
3819 	}
3820 
3821 	return 0;
3822 }
3823 
3824 #ifndef BRING_UP_DEBUG
3825 static int gfx_v11_0_kiq_enable_kgq(struct amdgpu_device *adev)
3826 {
3827 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
3828 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3829 	int r, i;
3830 
3831 	if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
3832 		return -EINVAL;
3833 
3834 	r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
3835 					adev->gfx.num_gfx_rings);
3836 	if (r) {
3837 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3838 		return r;
3839 	}
3840 
3841 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3842 		kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.gfx_ring[i]);
3843 
3844 	return amdgpu_ring_test_helper(kiq_ring);
3845 }
3846 #endif
3847 
3848 static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
3849 {
3850 	int r, i;
3851 	struct amdgpu_ring *ring;
3852 
3853 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
3854 		ring = &adev->gfx.gfx_ring[i];
3855 
3856 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3857 		if (unlikely(r != 0))
3858 			goto done;
3859 
3860 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3861 		if (!r) {
3862 			r = gfx_v11_0_gfx_init_queue(ring);
3863 			amdgpu_bo_kunmap(ring->mqd_obj);
3864 			ring->mqd_ptr = NULL;
3865 		}
3866 		amdgpu_bo_unreserve(ring->mqd_obj);
3867 		if (r)
3868 			goto done;
3869 	}
3870 #ifndef BRING_UP_DEBUG
3871 	r = gfx_v11_0_kiq_enable_kgq(adev);
3872 	if (r)
3873 		goto done;
3874 #endif
3875 	r = gfx_v11_0_cp_gfx_start(adev);
3876 	if (r)
3877 		goto done;
3878 
3879 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
3880 		ring = &adev->gfx.gfx_ring[i];
3881 		ring->sched.ready = true;
3882 	}
3883 done:
3884 	return r;
3885 }
3886 
3887 static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
3888 				      struct amdgpu_mqd_prop *prop)
3889 {
3890 	struct v11_compute_mqd *mqd = m;
3891 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3892 	uint32_t tmp;
3893 
3894 	mqd->header = 0xC0310800;
3895 	mqd->compute_pipelinestat_enable = 0x00000001;
3896 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3897 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3898 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3899 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3900 	mqd->compute_misc_reserved = 0x00000007;
3901 
3902 	eop_base_addr = prop->eop_gpu_addr >> 8;
3903 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3904 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3905 
3906 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3907 	tmp = RREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL);
3908 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3909 			(order_base_2(GFX11_MEC_HPD_SIZE / 4) - 1));
3910 
3911 	mqd->cp_hqd_eop_control = tmp;
3912 
3913 	/* enable doorbell? */
3914 	tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
3915 
3916 	if (prop->use_doorbell) {
3917 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3918 				    DOORBELL_OFFSET, prop->doorbell_index);
3919 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3920 				    DOORBELL_EN, 1);
3921 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3922 				    DOORBELL_SOURCE, 0);
3923 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3924 				    DOORBELL_HIT, 0);
3925 	} else {
3926 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3927 				    DOORBELL_EN, 0);
3928 	}
3929 
3930 	mqd->cp_hqd_pq_doorbell_control = tmp;
3931 
3932 	/* disable the queue if it's active */
3933 	mqd->cp_hqd_dequeue_request = 0;
3934 	mqd->cp_hqd_pq_rptr = 0;
3935 	mqd->cp_hqd_pq_wptr_lo = 0;
3936 	mqd->cp_hqd_pq_wptr_hi = 0;
3937 
3938 	/* set the pointer to the MQD */
3939 	mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc;
3940 	mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
3941 
3942 	/* set MQD vmid to 0 */
3943 	tmp = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL);
3944 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3945 	mqd->cp_mqd_control = tmp;
3946 
3947 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3948 	hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
3949 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3950 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3951 
3952 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3953 	tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL);
3954 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3955 			    (order_base_2(prop->queue_size / 4) - 1));
3956 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3957 			    (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
3958 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3959 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
3960 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3961 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3962 	mqd->cp_hqd_pq_control = tmp;
3963 
3964 	/* set the wb address whether it's enabled or not */
3965 	wb_gpu_addr = prop->rptr_gpu_addr;
3966 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3967 	mqd->cp_hqd_pq_rptr_report_addr_hi =
3968 		upper_32_bits(wb_gpu_addr) & 0xffff;
3969 
3970 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3971 	wb_gpu_addr = prop->wptr_gpu_addr;
3972 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3973 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3974 
3975 	tmp = 0;
3976 	/* enable the doorbell if requested */
3977 	if (prop->use_doorbell) {
3978 		tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
3979 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3980 				DOORBELL_OFFSET, prop->doorbell_index);
3981 
3982 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3983 				    DOORBELL_EN, 1);
3984 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3985 				    DOORBELL_SOURCE, 0);
3986 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3987 				    DOORBELL_HIT, 0);
3988 	}
3989 
3990 	mqd->cp_hqd_pq_doorbell_control = tmp;
3991 
3992 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3993 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR);
3994 
3995 	/* set the vmid for the queue */
3996 	mqd->cp_hqd_vmid = 0;
3997 
3998 	tmp = RREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE);
3999 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55);
4000 	mqd->cp_hqd_persistent_state = tmp;
4001 
4002 	/* set MIN_IB_AVAIL_SIZE */
4003 	tmp = RREG32_SOC15(GC, 0, regCP_HQD_IB_CONTROL);
4004 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4005 	mqd->cp_hqd_ib_control = tmp;
4006 
4007 	/* set static priority for a compute queue/ring */
4008 	mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority;
4009 	mqd->cp_hqd_queue_priority = prop->hqd_queue_priority;
4010 
4011 	mqd->cp_hqd_active = prop->hqd_active;
4012 
4013 	return 0;
4014 }
4015 
4016 static int gfx_v11_0_kiq_init_register(struct amdgpu_ring *ring)
4017 {
4018 	struct amdgpu_device *adev = ring->adev;
4019 	struct v11_compute_mqd *mqd = ring->mqd_ptr;
4020 	int j;
4021 
4022 	/* inactivate the queue */
4023 	if (amdgpu_sriov_vf(adev))
4024 		WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0);
4025 
4026 	/* disable wptr polling */
4027 	WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4028 
4029 	/* write the EOP addr */
4030 	WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR,
4031 	       mqd->cp_hqd_eop_base_addr_lo);
4032 	WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI,
4033 	       mqd->cp_hqd_eop_base_addr_hi);
4034 
4035 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4036 	WREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL,
4037 	       mqd->cp_hqd_eop_control);
4038 
4039 	/* enable doorbell? */
4040 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
4041 	       mqd->cp_hqd_pq_doorbell_control);
4042 
4043 	/* disable the queue if it's active */
4044 	if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) {
4045 		WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1);
4046 		for (j = 0; j < adev->usec_timeout; j++) {
4047 			if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
4048 				break;
4049 			udelay(1);
4050 		}
4051 		WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST,
4052 		       mqd->cp_hqd_dequeue_request);
4053 		WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR,
4054 		       mqd->cp_hqd_pq_rptr);
4055 		WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
4056 		       mqd->cp_hqd_pq_wptr_lo);
4057 		WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
4058 		       mqd->cp_hqd_pq_wptr_hi);
4059 	}
4060 
4061 	/* set the pointer to the MQD */
4062 	WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR,
4063 	       mqd->cp_mqd_base_addr_lo);
4064 	WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI,
4065 	       mqd->cp_mqd_base_addr_hi);
4066 
4067 	/* set MQD vmid to 0 */
4068 	WREG32_SOC15(GC, 0, regCP_MQD_CONTROL,
4069 	       mqd->cp_mqd_control);
4070 
4071 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4072 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE,
4073 	       mqd->cp_hqd_pq_base_lo);
4074 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI,
4075 	       mqd->cp_hqd_pq_base_hi);
4076 
4077 	/* set up the HQD, this is similar to CP_RB0_CNTL */
4078 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL,
4079 	       mqd->cp_hqd_pq_control);
4080 
4081 	/* set the wb address whether it's enabled or not */
4082 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR,
4083 		mqd->cp_hqd_pq_rptr_report_addr_lo);
4084 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4085 		mqd->cp_hqd_pq_rptr_report_addr_hi);
4086 
4087 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4088 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR,
4089 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
4090 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4091 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
4092 
4093 	/* enable the doorbell if requested */
4094 	if (ring->use_doorbell) {
4095 		WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
4096 			(adev->doorbell_index.kiq * 2) << 2);
4097 		WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
4098 			(adev->doorbell_index.userqueue_end * 2) << 2);
4099 	}
4100 
4101 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
4102 	       mqd->cp_hqd_pq_doorbell_control);
4103 
4104 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4105 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
4106 	       mqd->cp_hqd_pq_wptr_lo);
4107 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
4108 	       mqd->cp_hqd_pq_wptr_hi);
4109 
4110 	/* set the vmid for the queue */
4111 	WREG32_SOC15(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid);
4112 
4113 	WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE,
4114 	       mqd->cp_hqd_persistent_state);
4115 
4116 	/* activate the queue */
4117 	WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE,
4118 	       mqd->cp_hqd_active);
4119 
4120 	if (ring->use_doorbell)
4121 		WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4122 
4123 	return 0;
4124 }
4125 
4126 static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring)
4127 {
4128 	struct amdgpu_device *adev = ring->adev;
4129 	struct v11_compute_mqd *mqd = ring->mqd_ptr;
4130 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4131 
4132 	gfx_v11_0_kiq_setting(ring);
4133 
4134 	if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4135 		/* reset MQD to a clean status */
4136 		if (adev->gfx.mec.mqd_backup[mqd_idx])
4137 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
4138 
4139 		/* reset ring buffer */
4140 		ring->wptr = 0;
4141 		amdgpu_ring_clear_ring(ring);
4142 
4143 		mutex_lock(&adev->srbm_mutex);
4144 		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4145 		gfx_v11_0_kiq_init_register(ring);
4146 		soc21_grbm_select(adev, 0, 0, 0, 0);
4147 		mutex_unlock(&adev->srbm_mutex);
4148 	} else {
4149 		memset((void *)mqd, 0, sizeof(*mqd));
4150 		mutex_lock(&adev->srbm_mutex);
4151 		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4152 		amdgpu_ring_init_mqd(ring);
4153 		gfx_v11_0_kiq_init_register(ring);
4154 		soc21_grbm_select(adev, 0, 0, 0, 0);
4155 		mutex_unlock(&adev->srbm_mutex);
4156 
4157 		if (adev->gfx.mec.mqd_backup[mqd_idx])
4158 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
4159 	}
4160 
4161 	return 0;
4162 }
4163 
4164 static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring)
4165 {
4166 	struct amdgpu_device *adev = ring->adev;
4167 	struct v11_compute_mqd *mqd = ring->mqd_ptr;
4168 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
4169 
4170 	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4171 		memset((void *)mqd, 0, sizeof(*mqd));
4172 		mutex_lock(&adev->srbm_mutex);
4173 		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4174 		amdgpu_ring_init_mqd(ring);
4175 		soc21_grbm_select(adev, 0, 0, 0, 0);
4176 		mutex_unlock(&adev->srbm_mutex);
4177 
4178 		if (adev->gfx.mec.mqd_backup[mqd_idx])
4179 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
4180 	} else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4181 		/* reset MQD to a clean status */
4182 		if (adev->gfx.mec.mqd_backup[mqd_idx])
4183 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
4184 
4185 		/* reset ring buffer */
4186 		ring->wptr = 0;
4187 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
4188 		amdgpu_ring_clear_ring(ring);
4189 	} else {
4190 		amdgpu_ring_clear_ring(ring);
4191 	}
4192 
4193 	return 0;
4194 }
4195 
4196 static int gfx_v11_0_kiq_resume(struct amdgpu_device *adev)
4197 {
4198 	struct amdgpu_ring *ring;
4199 	int r;
4200 
4201 	ring = &adev->gfx.kiq.ring;
4202 
4203 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
4204 	if (unlikely(r != 0))
4205 		return r;
4206 
4207 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
4208 	if (unlikely(r != 0)) {
4209 		amdgpu_bo_unreserve(ring->mqd_obj);
4210 		return r;
4211 	}
4212 
4213 	gfx_v11_0_kiq_init_queue(ring);
4214 	amdgpu_bo_kunmap(ring->mqd_obj);
4215 	ring->mqd_ptr = NULL;
4216 	amdgpu_bo_unreserve(ring->mqd_obj);
4217 	ring->sched.ready = true;
4218 	return 0;
4219 }
4220 
4221 static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev)
4222 {
4223 	struct amdgpu_ring *ring = NULL;
4224 	int r = 0, i;
4225 
4226 	if (!amdgpu_async_gfx_ring)
4227 		gfx_v11_0_cp_compute_enable(adev, true);
4228 
4229 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4230 		ring = &adev->gfx.compute_ring[i];
4231 
4232 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
4233 		if (unlikely(r != 0))
4234 			goto done;
4235 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
4236 		if (!r) {
4237 			r = gfx_v11_0_kcq_init_queue(ring);
4238 			amdgpu_bo_kunmap(ring->mqd_obj);
4239 			ring->mqd_ptr = NULL;
4240 		}
4241 		amdgpu_bo_unreserve(ring->mqd_obj);
4242 		if (r)
4243 			goto done;
4244 	}
4245 
4246 	r = amdgpu_gfx_enable_kcq(adev);
4247 done:
4248 	return r;
4249 }
4250 
4251 static int gfx_v11_0_cp_resume(struct amdgpu_device *adev)
4252 {
4253 	int r, i;
4254 	struct amdgpu_ring *ring;
4255 
4256 	if (!(adev->flags & AMD_IS_APU))
4257 		gfx_v11_0_enable_gui_idle_interrupt(adev, false);
4258 
4259 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4260 		/* legacy firmware loading */
4261 		r = gfx_v11_0_cp_gfx_load_microcode(adev);
4262 		if (r)
4263 			return r;
4264 
4265 		if (adev->gfx.rs64_enable)
4266 			r = gfx_v11_0_cp_compute_load_microcode_rs64(adev);
4267 		else
4268 			r = gfx_v11_0_cp_compute_load_microcode(adev);
4269 		if (r)
4270 			return r;
4271 	}
4272 
4273 	gfx_v11_0_cp_set_doorbell_range(adev);
4274 
4275 	if (amdgpu_async_gfx_ring) {
4276 		gfx_v11_0_cp_compute_enable(adev, true);
4277 		gfx_v11_0_cp_gfx_enable(adev, true);
4278 	}
4279 
4280 	if (adev->enable_mes_kiq && adev->mes.kiq_hw_init)
4281 		r = amdgpu_mes_kiq_hw_init(adev);
4282 	else
4283 		r = gfx_v11_0_kiq_resume(adev);
4284 	if (r)
4285 		return r;
4286 
4287 	r = gfx_v11_0_kcq_resume(adev);
4288 	if (r)
4289 		return r;
4290 
4291 	if (!amdgpu_async_gfx_ring) {
4292 		r = gfx_v11_0_cp_gfx_resume(adev);
4293 		if (r)
4294 			return r;
4295 	} else {
4296 		r = gfx_v11_0_cp_async_gfx_ring_resume(adev);
4297 		if (r)
4298 			return r;
4299 	}
4300 
4301 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4302 		ring = &adev->gfx.gfx_ring[i];
4303 		r = amdgpu_ring_test_helper(ring);
4304 		if (r)
4305 			return r;
4306 	}
4307 
4308 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4309 		ring = &adev->gfx.compute_ring[i];
4310 		r = amdgpu_ring_test_helper(ring);
4311 		if (r)
4312 			return r;
4313 	}
4314 
4315 	return 0;
4316 }
4317 
4318 static void gfx_v11_0_cp_enable(struct amdgpu_device *adev, bool enable)
4319 {
4320 	gfx_v11_0_cp_gfx_enable(adev, enable);
4321 	gfx_v11_0_cp_compute_enable(adev, enable);
4322 }
4323 
4324 static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev)
4325 {
4326 	int r;
4327 	bool value;
4328 
4329 	r = adev->gfxhub.funcs->gart_enable(adev);
4330 	if (r)
4331 		return r;
4332 
4333 	adev->hdp.funcs->flush_hdp(adev, NULL);
4334 
4335 	value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
4336 		false : true;
4337 
4338 	adev->gfxhub.funcs->set_fault_enable_default(adev, value);
4339 	amdgpu_gmc_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB_0, 0);
4340 
4341 	return 0;
4342 }
4343 
4344 static void gfx_v11_0_select_cp_fw_arch(struct amdgpu_device *adev)
4345 {
4346 	u32 tmp;
4347 
4348 	/* select RS64 */
4349 	if (adev->gfx.rs64_enable) {
4350 		tmp = RREG32_SOC15(GC, 0, regCP_GFX_CNTL);
4351 		tmp = REG_SET_FIELD(tmp, CP_GFX_CNTL, ENGINE_SEL, 1);
4352 		WREG32_SOC15(GC, 0, regCP_GFX_CNTL, tmp);
4353 
4354 		tmp = RREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL);
4355 		tmp = REG_SET_FIELD(tmp, CP_MEC_ISA_CNTL, ISA_MODE, 1);
4356 		WREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL, tmp);
4357 	}
4358 
4359 	if (amdgpu_emu_mode == 1)
4360 		msleep(100);
4361 }
4362 
4363 static int get_gb_addr_config(struct amdgpu_device * adev)
4364 {
4365 	u32 gb_addr_config;
4366 
4367 	gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG);
4368 	if (gb_addr_config == 0)
4369 		return -EINVAL;
4370 
4371 	adev->gfx.config.gb_addr_config_fields.num_pkrs =
4372 		1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS);
4373 
4374 	adev->gfx.config.gb_addr_config = gb_addr_config;
4375 
4376 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
4377 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4378 				      GB_ADDR_CONFIG, NUM_PIPES);
4379 
4380 	adev->gfx.config.max_tile_pipes =
4381 		adev->gfx.config.gb_addr_config_fields.num_pipes;
4382 
4383 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
4384 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4385 				      GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS);
4386 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
4387 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4388 				      GB_ADDR_CONFIG, NUM_RB_PER_SE);
4389 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
4390 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4391 				      GB_ADDR_CONFIG, NUM_SHADER_ENGINES);
4392 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
4393 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4394 				      GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE));
4395 
4396 	return 0;
4397 }
4398 
4399 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev)
4400 {
4401 	uint32_t data;
4402 
4403 	data = RREG32_SOC15(GC, 0, regCPC_PSP_DEBUG);
4404 	data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK;
4405 	WREG32_SOC15(GC, 0, regCPC_PSP_DEBUG, data);
4406 
4407 	data = RREG32_SOC15(GC, 0, regCPG_PSP_DEBUG);
4408 	data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK;
4409 	WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data);
4410 }
4411 
4412 static int gfx_v11_0_hw_init(void *handle)
4413 {
4414 	int r;
4415 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4416 
4417 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
4418 		if (adev->gfx.imu.funcs) {
4419 			/* RLC autoload sequence 1: Program rlc ram */
4420 			if (adev->gfx.imu.funcs->program_rlc_ram)
4421 				adev->gfx.imu.funcs->program_rlc_ram(adev);
4422 		}
4423 		/* rlc autoload firmware */
4424 		r = gfx_v11_0_rlc_backdoor_autoload_enable(adev);
4425 		if (r)
4426 			return r;
4427 	} else {
4428 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4429 			if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
4430 				if (adev->gfx.imu.funcs->load_microcode)
4431 					adev->gfx.imu.funcs->load_microcode(adev);
4432 				if (adev->gfx.imu.funcs->setup_imu)
4433 					adev->gfx.imu.funcs->setup_imu(adev);
4434 				if (adev->gfx.imu.funcs->start_imu)
4435 					adev->gfx.imu.funcs->start_imu(adev);
4436 			}
4437 
4438 			/* disable gpa mode in backdoor loading */
4439 			gfx_v11_0_disable_gpa_mode(adev);
4440 		}
4441 	}
4442 
4443 	if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) ||
4444 	    (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
4445 		r = gfx_v11_0_wait_for_rlc_autoload_complete(adev);
4446 		if (r) {
4447 			dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r);
4448 			return r;
4449 		}
4450 	}
4451 
4452 	adev->gfx.is_poweron = true;
4453 
4454 	if(get_gb_addr_config(adev))
4455 		DRM_WARN("Invalid gb_addr_config !\n");
4456 
4457 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
4458 	    adev->gfx.rs64_enable)
4459 		gfx_v11_0_config_gfx_rs64(adev);
4460 
4461 	r = gfx_v11_0_gfxhub_enable(adev);
4462 	if (r)
4463 		return r;
4464 
4465 	if (!amdgpu_emu_mode)
4466 		gfx_v11_0_init_golden_registers(adev);
4467 
4468 	if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) ||
4469 	    (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && amdgpu_dpm == 1)) {
4470 		/**
4471 		 * For gfx 11, rlc firmware loading relies on smu firmware is
4472 		 * loaded firstly, so in direct type, it has to load smc ucode
4473 		 * here before rlc.
4474 		 */
4475 		if (!(adev->flags & AMD_IS_APU)) {
4476 			r = amdgpu_pm_load_smu_firmware(adev, NULL);
4477 			if (r)
4478 				return r;
4479 		}
4480 	}
4481 
4482 	gfx_v11_0_constants_init(adev);
4483 
4484 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
4485 		gfx_v11_0_select_cp_fw_arch(adev);
4486 
4487 	if (adev->nbio.funcs->gc_doorbell_init)
4488 		adev->nbio.funcs->gc_doorbell_init(adev);
4489 
4490 	r = gfx_v11_0_rlc_resume(adev);
4491 	if (r)
4492 		return r;
4493 
4494 	/*
4495 	 * init golden registers and rlc resume may override some registers,
4496 	 * reconfig them here
4497 	 */
4498 	gfx_v11_0_tcp_harvest(adev);
4499 
4500 	r = gfx_v11_0_cp_resume(adev);
4501 	if (r)
4502 		return r;
4503 
4504 	return r;
4505 }
4506 
4507 #ifndef BRING_UP_DEBUG
4508 static int gfx_v11_0_kiq_disable_kgq(struct amdgpu_device *adev)
4509 {
4510 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4511 	struct amdgpu_ring *kiq_ring = &kiq->ring;
4512 	int i, r = 0;
4513 
4514 	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
4515 		return -EINVAL;
4516 
4517 	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
4518 					adev->gfx.num_gfx_rings))
4519 		return -ENOMEM;
4520 
4521 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4522 		kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i],
4523 					   PREEMPT_QUEUES, 0, 0);
4524 
4525 	if (adev->gfx.kiq.ring.sched.ready)
4526 		r = amdgpu_ring_test_helper(kiq_ring);
4527 
4528 	return r;
4529 }
4530 #endif
4531 
4532 static int gfx_v11_0_hw_fini(void *handle)
4533 {
4534 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4535 	int r;
4536 	uint32_t tmp;
4537 
4538 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4539 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4540 
4541 	if (!adev->no_hw_access) {
4542 #ifndef BRING_UP_DEBUG
4543 		if (amdgpu_async_gfx_ring) {
4544 			r = gfx_v11_0_kiq_disable_kgq(adev);
4545 			if (r)
4546 				DRM_ERROR("KGQ disable failed\n");
4547 		}
4548 #endif
4549 		if (amdgpu_gfx_disable_kcq(adev))
4550 			DRM_ERROR("KCQ disable failed\n");
4551 
4552 		amdgpu_mes_kiq_hw_fini(adev);
4553 	}
4554 
4555 	if (amdgpu_sriov_vf(adev)) {
4556 		gfx_v11_0_cp_gfx_enable(adev, false);
4557 		/* Program KIQ position of RLC_CP_SCHEDULERS during destroy */
4558 		tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
4559 		tmp &= 0xffffff00;
4560 		WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
4561 
4562 		return 0;
4563 	}
4564 	gfx_v11_0_cp_enable(adev, false);
4565 	gfx_v11_0_enable_gui_idle_interrupt(adev, false);
4566 
4567 	adev->gfxhub.funcs->gart_disable(adev);
4568 
4569 	adev->gfx.is_poweron = false;
4570 
4571 	return 0;
4572 }
4573 
4574 static int gfx_v11_0_suspend(void *handle)
4575 {
4576 	return gfx_v11_0_hw_fini(handle);
4577 }
4578 
4579 static int gfx_v11_0_resume(void *handle)
4580 {
4581 	return gfx_v11_0_hw_init(handle);
4582 }
4583 
4584 static bool gfx_v11_0_is_idle(void *handle)
4585 {
4586 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4587 
4588 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS),
4589 				GRBM_STATUS, GUI_ACTIVE))
4590 		return false;
4591 	else
4592 		return true;
4593 }
4594 
4595 static int gfx_v11_0_wait_for_idle(void *handle)
4596 {
4597 	unsigned i;
4598 	u32 tmp;
4599 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4600 
4601 	for (i = 0; i < adev->usec_timeout; i++) {
4602 		/* read MC_STATUS */
4603 		tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS) &
4604 			GRBM_STATUS__GUI_ACTIVE_MASK;
4605 
4606 		if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
4607 			return 0;
4608 		udelay(1);
4609 	}
4610 	return -ETIMEDOUT;
4611 }
4612 
4613 static int gfx_v11_0_soft_reset(void *handle)
4614 {
4615 	u32 grbm_soft_reset = 0;
4616 	u32 tmp;
4617 	int i, j, k;
4618 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4619 
4620 	tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
4621 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0);
4622 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 0);
4623 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 0);
4624 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0);
4625 	WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
4626 
4627 	gfx_v11_0_set_safe_mode(adev);
4628 
4629 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
4630 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
4631 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
4632 				tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
4633 				tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i);
4634 				tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j);
4635 				tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k);
4636 				WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
4637 
4638 				WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2);
4639 				WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1);
4640 			}
4641 		}
4642 	}
4643 	for (i = 0; i < adev->gfx.me.num_me; ++i) {
4644 		for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
4645 			for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
4646 				tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
4647 				tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i);
4648 				tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j);
4649 				tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k);
4650 				WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
4651 
4652 				WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1);
4653 			}
4654 		}
4655 	}
4656 
4657 	WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe);
4658 
4659 	// Read CP_VMID_RESET register three times.
4660 	// to get sufficient time for GFX_HQD_ACTIVE reach 0
4661 	RREG32_SOC15(GC, 0, regCP_VMID_RESET);
4662 	RREG32_SOC15(GC, 0, regCP_VMID_RESET);
4663 	RREG32_SOC15(GC, 0, regCP_VMID_RESET);
4664 
4665 	for (i = 0; i < adev->usec_timeout; i++) {
4666 		if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) &&
4667 		    !RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE))
4668 			break;
4669 		udelay(1);
4670 	}
4671 	if (i >= adev->usec_timeout) {
4672 		printk("Failed to wait all pipes clean\n");
4673 		return -EINVAL;
4674 	}
4675 
4676 	/**********  trigger soft reset  ***********/
4677 	grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
4678 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4679 					SOFT_RESET_CP, 1);
4680 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4681 					SOFT_RESET_GFX, 1);
4682 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4683 					SOFT_RESET_CPF, 1);
4684 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4685 					SOFT_RESET_CPC, 1);
4686 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4687 					SOFT_RESET_CPG, 1);
4688 	WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset);
4689 	/**********  exit soft reset  ***********/
4690 	grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
4691 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4692 					SOFT_RESET_CP, 0);
4693 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4694 					SOFT_RESET_GFX, 0);
4695 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4696 					SOFT_RESET_CPF, 0);
4697 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4698 					SOFT_RESET_CPC, 0);
4699 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4700 					SOFT_RESET_CPG, 0);
4701 	WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset);
4702 
4703 	tmp = RREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL);
4704 	tmp = REG_SET_FIELD(tmp, CP_SOFT_RESET_CNTL, CMP_HQD_REG_RESET, 0x1);
4705 	WREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL, tmp);
4706 
4707 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, 0x0);
4708 	WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, 0x0);
4709 
4710 	for (i = 0; i < adev->usec_timeout; i++) {
4711 		if (!RREG32_SOC15(GC, 0, regCP_VMID_RESET))
4712 			break;
4713 		udelay(1);
4714 	}
4715 	if (i >= adev->usec_timeout) {
4716 		printk("Failed to wait CP_VMID_RESET to 0\n");
4717 		return -EINVAL;
4718 	}
4719 
4720 	tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
4721 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
4722 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
4723 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
4724 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
4725 	WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
4726 
4727 	gfx_v11_0_unset_safe_mode(adev);
4728 
4729 	return gfx_v11_0_cp_resume(adev);
4730 }
4731 
4732 static bool gfx_v11_0_check_soft_reset(void *handle)
4733 {
4734 	int i, r;
4735 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4736 	struct amdgpu_ring *ring;
4737 	long tmo = msecs_to_jiffies(1000);
4738 
4739 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4740 		ring = &adev->gfx.gfx_ring[i];
4741 		r = amdgpu_ring_test_ib(ring, tmo);
4742 		if (r)
4743 			return true;
4744 	}
4745 
4746 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4747 		ring = &adev->gfx.compute_ring[i];
4748 		r = amdgpu_ring_test_ib(ring, tmo);
4749 		if (r)
4750 			return true;
4751 	}
4752 
4753 	return false;
4754 }
4755 
4756 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4757 {
4758 	uint64_t clock;
4759 
4760 	amdgpu_gfx_off_ctrl(adev, false);
4761 	mutex_lock(&adev->gfx.gpu_clock_mutex);
4762 	clock = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER) |
4763 		((uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER) << 32ULL);
4764 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
4765 	amdgpu_gfx_off_ctrl(adev, true);
4766 	return clock;
4767 }
4768 
4769 static void gfx_v11_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4770 					   uint32_t vmid,
4771 					   uint32_t gds_base, uint32_t gds_size,
4772 					   uint32_t gws_base, uint32_t gws_size,
4773 					   uint32_t oa_base, uint32_t oa_size)
4774 {
4775 	struct amdgpu_device *adev = ring->adev;
4776 
4777 	/* GDS Base */
4778 	gfx_v11_0_write_data_to_reg(ring, 0, false,
4779 				    SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_BASE) + 2 * vmid,
4780 				    gds_base);
4781 
4782 	/* GDS Size */
4783 	gfx_v11_0_write_data_to_reg(ring, 0, false,
4784 				    SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_SIZE) + 2 * vmid,
4785 				    gds_size);
4786 
4787 	/* GWS */
4788 	gfx_v11_0_write_data_to_reg(ring, 0, false,
4789 				    SOC15_REG_OFFSET(GC, 0, regGDS_GWS_VMID0) + vmid,
4790 				    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4791 
4792 	/* OA */
4793 	gfx_v11_0_write_data_to_reg(ring, 0, false,
4794 				    SOC15_REG_OFFSET(GC, 0, regGDS_OA_VMID0) + vmid,
4795 				    (1 << (oa_size + oa_base)) - (1 << oa_base));
4796 }
4797 
4798 static int gfx_v11_0_early_init(void *handle)
4799 {
4800 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4801 
4802 	adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS;
4803 	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4804 					  AMDGPU_MAX_COMPUTE_RINGS);
4805 
4806 	gfx_v11_0_set_kiq_pm4_funcs(adev);
4807 	gfx_v11_0_set_ring_funcs(adev);
4808 	gfx_v11_0_set_irq_funcs(adev);
4809 	gfx_v11_0_set_gds_init(adev);
4810 	gfx_v11_0_set_rlc_funcs(adev);
4811 	gfx_v11_0_set_mqd_funcs(adev);
4812 	gfx_v11_0_set_imu_funcs(adev);
4813 
4814 	gfx_v11_0_init_rlcg_reg_access_ctrl(adev);
4815 
4816 	return 0;
4817 }
4818 
4819 static int gfx_v11_0_late_init(void *handle)
4820 {
4821 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4822 	int r;
4823 
4824 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4825 	if (r)
4826 		return r;
4827 
4828 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4829 	if (r)
4830 		return r;
4831 
4832 	return 0;
4833 }
4834 
4835 static bool gfx_v11_0_is_rlc_enabled(struct amdgpu_device *adev)
4836 {
4837 	uint32_t rlc_cntl;
4838 
4839 	/* if RLC is not enabled, do nothing */
4840 	rlc_cntl = RREG32_SOC15(GC, 0, regRLC_CNTL);
4841 	return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false;
4842 }
4843 
4844 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev)
4845 {
4846 	uint32_t data;
4847 	unsigned i;
4848 
4849 	data = RLC_SAFE_MODE__CMD_MASK;
4850 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4851 
4852 	WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data);
4853 
4854 	/* wait for RLC_SAFE_MODE */
4855 	for (i = 0; i < adev->usec_timeout; i++) {
4856 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE),
4857 				   RLC_SAFE_MODE, CMD))
4858 			break;
4859 		udelay(1);
4860 	}
4861 }
4862 
4863 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev)
4864 {
4865 	WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK);
4866 }
4867 
4868 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
4869 				      bool enable)
4870 {
4871 	uint32_t def, data;
4872 
4873 	if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK))
4874 		return;
4875 
4876 	def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4877 
4878 	if (enable)
4879 		data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
4880 	else
4881 		data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
4882 
4883 	if (def != data)
4884 		WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4885 }
4886 
4887 static void gfx_v11_0_update_sram_fgcg(struct amdgpu_device *adev,
4888 				       bool enable)
4889 {
4890 	uint32_t def, data;
4891 
4892 	if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG))
4893 		return;
4894 
4895 	def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4896 
4897 	if (enable)
4898 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
4899 	else
4900 		data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
4901 
4902 	if (def != data)
4903 		WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4904 }
4905 
4906 static void gfx_v11_0_update_repeater_fgcg(struct amdgpu_device *adev,
4907 					   bool enable)
4908 {
4909 	uint32_t def, data;
4910 
4911 	if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG))
4912 		return;
4913 
4914 	def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4915 
4916 	if (enable)
4917 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK;
4918 	else
4919 		data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK;
4920 
4921 	if (def != data)
4922 		WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4923 }
4924 
4925 static void gfx_v11_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4926 						       bool enable)
4927 {
4928 	uint32_t data, def;
4929 
4930 	if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)))
4931 		return;
4932 
4933 	/* It is disabled by HW by default */
4934 	if (enable) {
4935 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
4936 			/* 1 - RLC_CGTT_MGCG_OVERRIDE */
4937 			def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4938 
4939 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4940 				  RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4941 				  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
4942 
4943 			if (def != data)
4944 				WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4945 		}
4946 	} else {
4947 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
4948 			def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4949 
4950 			data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4951 				 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4952 				 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
4953 
4954 			if (def != data)
4955 				WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4956 		}
4957 	}
4958 }
4959 
4960 static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4961 						       bool enable)
4962 {
4963 	uint32_t def, data;
4964 
4965 	if (!(adev->cg_flags &
4966 	      (AMD_CG_SUPPORT_GFX_CGCG |
4967 	      AMD_CG_SUPPORT_GFX_CGLS |
4968 	      AMD_CG_SUPPORT_GFX_3D_CGCG |
4969 	      AMD_CG_SUPPORT_GFX_3D_CGLS)))
4970 		return;
4971 
4972 	if (enable) {
4973 		def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4974 
4975 		/* unset CGCG override */
4976 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
4977 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4978 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4979 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4980 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG ||
4981 		    adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4982 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4983 
4984 		/* update CGCG override bits */
4985 		if (def != data)
4986 			WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4987 
4988 		/* enable cgcg FSM(0x0000363F) */
4989 		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
4990 
4991 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
4992 			data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK;
4993 			data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4994 				 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4995 		}
4996 
4997 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
4998 			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK;
4999 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5000 				 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5001 		}
5002 
5003 		if (def != data)
5004 			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
5005 
5006 		/* Program RLC_CGCG_CGLS_CTRL_3D */
5007 		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
5008 
5009 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5010 			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD_MASK;
5011 			data |= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5012 				 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
5013 		}
5014 
5015 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5016 			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY_MASK;
5017 			data |= (0xf << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5018 				 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
5019 		}
5020 
5021 		if (def != data)
5022 			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
5023 
5024 		/* set IDLE_POLL_COUNT(0x00900100) */
5025 		def = data = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL);
5026 
5027 		data &= ~(CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK | CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK);
5028 		data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5029 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5030 
5031 		if (def != data)
5032 			WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data);
5033 
5034 		data = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
5035 		data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
5036 		data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
5037 		data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
5038 		data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
5039 		WREG32_SOC15(GC, 0, regCP_INT_CNTL, data);
5040 
5041 		data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
5042 		data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
5043 		WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
5044 
5045 		/* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
5046 		if (adev->sdma.num_instances > 1) {
5047 			data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
5048 			data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
5049 			WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
5050 		}
5051 	} else {
5052 		/* Program RLC_CGCG_CGLS_CTRL */
5053 		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
5054 
5055 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
5056 			data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5057 
5058 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5059 			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5060 
5061 		if (def != data)
5062 			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
5063 
5064 		/* Program RLC_CGCG_CGLS_CTRL_3D */
5065 		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
5066 
5067 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
5068 			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
5069 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
5070 			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
5071 
5072 		if (def != data)
5073 			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
5074 
5075 		data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
5076 		data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
5077 		WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
5078 
5079 		/* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
5080 		if (adev->sdma.num_instances > 1) {
5081 			data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
5082 			data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
5083 			WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
5084 		}
5085 	}
5086 }
5087 
5088 static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5089 					    bool enable)
5090 {
5091 	amdgpu_gfx_rlc_enter_safe_mode(adev);
5092 
5093 	gfx_v11_0_update_coarse_grain_clock_gating(adev, enable);
5094 
5095 	gfx_v11_0_update_medium_grain_clock_gating(adev, enable);
5096 
5097 	gfx_v11_0_update_repeater_fgcg(adev, enable);
5098 
5099 	gfx_v11_0_update_sram_fgcg(adev, enable);
5100 
5101 	gfx_v11_0_update_perf_clk(adev, enable);
5102 
5103 	if (adev->cg_flags &
5104 	    (AMD_CG_SUPPORT_GFX_MGCG |
5105 	     AMD_CG_SUPPORT_GFX_CGLS |
5106 	     AMD_CG_SUPPORT_GFX_CGCG |
5107 	     AMD_CG_SUPPORT_GFX_3D_CGCG |
5108 	     AMD_CG_SUPPORT_GFX_3D_CGLS))
5109 	        gfx_v11_0_enable_gui_idle_interrupt(adev, enable);
5110 
5111 	amdgpu_gfx_rlc_exit_safe_mode(adev);
5112 
5113 	return 0;
5114 }
5115 
5116 static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
5117 {
5118 	u32 reg, data;
5119 
5120 	amdgpu_gfx_off_ctrl(adev, false);
5121 
5122 	reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL);
5123 	if (amdgpu_sriov_is_pp_one_vf(adev))
5124 		data = RREG32_NO_KIQ(reg);
5125 	else
5126 		data = RREG32(reg);
5127 
5128 	data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
5129 	data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
5130 
5131 	if (amdgpu_sriov_is_pp_one_vf(adev))
5132 		WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data);
5133 	else
5134 		WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data);
5135 
5136 	amdgpu_gfx_off_ctrl(adev, true);
5137 }
5138 
5139 static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = {
5140 	.is_rlc_enabled = gfx_v11_0_is_rlc_enabled,
5141 	.set_safe_mode = gfx_v11_0_set_safe_mode,
5142 	.unset_safe_mode = gfx_v11_0_unset_safe_mode,
5143 	.init = gfx_v11_0_rlc_init,
5144 	.get_csb_size = gfx_v11_0_get_csb_size,
5145 	.get_csb_buffer = gfx_v11_0_get_csb_buffer,
5146 	.resume = gfx_v11_0_rlc_resume,
5147 	.stop = gfx_v11_0_rlc_stop,
5148 	.reset = gfx_v11_0_rlc_reset,
5149 	.start = gfx_v11_0_rlc_start,
5150 	.update_spm_vmid = gfx_v11_0_update_spm_vmid,
5151 };
5152 
5153 static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable)
5154 {
5155 	u32 data = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
5156 
5157 	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5158 		data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5159 	else
5160 		data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5161 
5162 	WREG32_SOC15(GC, 0, regRLC_PG_CNTL, data);
5163 
5164 	// Program RLC_PG_DELAY3 for CGPG hysteresis
5165 	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
5166 		switch (adev->ip_versions[GC_HWIP][0]) {
5167 		case IP_VERSION(11, 0, 1):
5168 			WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1);
5169 			break;
5170 		default:
5171 			break;
5172 		}
5173 	}
5174 }
5175 
5176 static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable)
5177 {
5178 	amdgpu_gfx_rlc_enter_safe_mode(adev);
5179 
5180 	gfx_v11_cntl_power_gating(adev, enable);
5181 
5182 	amdgpu_gfx_rlc_exit_safe_mode(adev);
5183 }
5184 
5185 static int gfx_v11_0_set_powergating_state(void *handle,
5186 					   enum amd_powergating_state state)
5187 {
5188 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5189 	bool enable = (state == AMD_PG_STATE_GATE);
5190 
5191 	if (amdgpu_sriov_vf(adev))
5192 		return 0;
5193 
5194 	switch (adev->ip_versions[GC_HWIP][0]) {
5195 	case IP_VERSION(11, 0, 0):
5196 	case IP_VERSION(11, 0, 2):
5197 		amdgpu_gfx_off_ctrl(adev, enable);
5198 		break;
5199 	case IP_VERSION(11, 0, 1):
5200 		gfx_v11_cntl_pg(adev, enable);
5201 		amdgpu_gfx_off_ctrl(adev, enable);
5202 		break;
5203 	default:
5204 		break;
5205 	}
5206 
5207 	return 0;
5208 }
5209 
5210 static int gfx_v11_0_set_clockgating_state(void *handle,
5211 					  enum amd_clockgating_state state)
5212 {
5213 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5214 
5215 	if (amdgpu_sriov_vf(adev))
5216 	        return 0;
5217 
5218 	switch (adev->ip_versions[GC_HWIP][0]) {
5219 	case IP_VERSION(11, 0, 0):
5220 	case IP_VERSION(11, 0, 1):
5221 	case IP_VERSION(11, 0, 2):
5222 	        gfx_v11_0_update_gfx_clock_gating(adev,
5223 	                        state ==  AMD_CG_STATE_GATE);
5224 	        break;
5225 	default:
5226 	        break;
5227 	}
5228 
5229 	return 0;
5230 }
5231 
5232 static void gfx_v11_0_get_clockgating_state(void *handle, u64 *flags)
5233 {
5234 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5235 	int data;
5236 
5237 	/* AMD_CG_SUPPORT_GFX_MGCG */
5238 	data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5239 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5240 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5241 
5242 	/* AMD_CG_SUPPORT_REPEATER_FGCG */
5243 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK))
5244 		*flags |= AMD_CG_SUPPORT_REPEATER_FGCG;
5245 
5246 	/* AMD_CG_SUPPORT_GFX_FGCG */
5247 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK))
5248 		*flags |= AMD_CG_SUPPORT_GFX_FGCG;
5249 
5250 	/* AMD_CG_SUPPORT_GFX_PERF_CLK */
5251 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK))
5252 		*flags |= AMD_CG_SUPPORT_GFX_PERF_CLK;
5253 
5254 	/* AMD_CG_SUPPORT_GFX_CGCG */
5255 	data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
5256 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5257 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5258 
5259 	/* AMD_CG_SUPPORT_GFX_CGLS */
5260 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5261 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5262 
5263 	/* AMD_CG_SUPPORT_GFX_3D_CGCG */
5264 	data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
5265 	if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5266 		*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5267 
5268 	/* AMD_CG_SUPPORT_GFX_3D_CGLS */
5269 	if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5270 		*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5271 }
5272 
5273 static u64 gfx_v11_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5274 {
5275 	/* gfx11 is 32bit rptr*/
5276 	return *(uint32_t *)ring->rptr_cpu_addr;
5277 }
5278 
5279 static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5280 {
5281 	struct amdgpu_device *adev = ring->adev;
5282 	u64 wptr;
5283 
5284 	/* XXX check if swapping is necessary on BE */
5285 	if (ring->use_doorbell) {
5286 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5287 	} else {
5288 		wptr = RREG32_SOC15(GC, 0, regCP_RB0_WPTR);
5289 		wptr += (u64)RREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI) << 32;
5290 	}
5291 
5292 	return wptr;
5293 }
5294 
5295 static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5296 {
5297 	struct amdgpu_device *adev = ring->adev;
5298 	uint32_t *wptr_saved;
5299 	uint32_t *is_queue_unmap;
5300 	uint64_t aggregated_db_index;
5301 	uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size;
5302 	uint64_t wptr_tmp;
5303 
5304 	if (ring->is_mes_queue) {
5305 		wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
5306 		is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
5307 					      sizeof(uint32_t));
5308 		aggregated_db_index =
5309 			amdgpu_mes_get_aggregated_doorbell_index(adev,
5310 								 ring->hw_prio);
5311 
5312 		wptr_tmp = ring->wptr & ring->buf_mask;
5313 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
5314 		*wptr_saved = wptr_tmp;
5315 		/* assume doorbell always being used by mes mapped queue */
5316 		if (*is_queue_unmap) {
5317 			WDOORBELL64(aggregated_db_index, wptr_tmp);
5318 			WDOORBELL64(ring->doorbell_index, wptr_tmp);
5319 		} else {
5320 			WDOORBELL64(ring->doorbell_index, wptr_tmp);
5321 
5322 			if (*is_queue_unmap)
5323 				WDOORBELL64(aggregated_db_index, wptr_tmp);
5324 		}
5325 	} else {
5326 		if (ring->use_doorbell) {
5327 			/* XXX check if swapping is necessary on BE */
5328 			atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
5329 				     ring->wptr);
5330 			WDOORBELL64(ring->doorbell_index, ring->wptr);
5331 		} else {
5332 			WREG32_SOC15(GC, 0, regCP_RB0_WPTR,
5333 				     lower_32_bits(ring->wptr));
5334 			WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI,
5335 				     upper_32_bits(ring->wptr));
5336 		}
5337 	}
5338 }
5339 
5340 static u64 gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5341 {
5342 	/* gfx11 hardware is 32bit rptr */
5343 	return *(uint32_t *)ring->rptr_cpu_addr;
5344 }
5345 
5346 static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5347 {
5348 	u64 wptr;
5349 
5350 	/* XXX check if swapping is necessary on BE */
5351 	if (ring->use_doorbell)
5352 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5353 	else
5354 		BUG();
5355 	return wptr;
5356 }
5357 
5358 static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5359 {
5360 	struct amdgpu_device *adev = ring->adev;
5361 	uint32_t *wptr_saved;
5362 	uint32_t *is_queue_unmap;
5363 	uint64_t aggregated_db_index;
5364 	uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size;
5365 	uint64_t wptr_tmp;
5366 
5367 	if (ring->is_mes_queue) {
5368 		wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
5369 		is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
5370 					      sizeof(uint32_t));
5371 		aggregated_db_index =
5372 			amdgpu_mes_get_aggregated_doorbell_index(adev,
5373 								 ring->hw_prio);
5374 
5375 		wptr_tmp = ring->wptr & ring->buf_mask;
5376 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
5377 		*wptr_saved = wptr_tmp;
5378 		/* assume doorbell always used by mes mapped queue */
5379 		if (*is_queue_unmap) {
5380 			WDOORBELL64(aggregated_db_index, wptr_tmp);
5381 			WDOORBELL64(ring->doorbell_index, wptr_tmp);
5382 		} else {
5383 			WDOORBELL64(ring->doorbell_index, wptr_tmp);
5384 
5385 			if (*is_queue_unmap)
5386 				WDOORBELL64(aggregated_db_index, wptr_tmp);
5387 		}
5388 	} else {
5389 		/* XXX check if swapping is necessary on BE */
5390 		if (ring->use_doorbell) {
5391 			atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
5392 				     ring->wptr);
5393 			WDOORBELL64(ring->doorbell_index, ring->wptr);
5394 		} else {
5395 			BUG(); /* only DOORBELL method supported on gfx11 now */
5396 		}
5397 	}
5398 }
5399 
5400 static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5401 {
5402 	struct amdgpu_device *adev = ring->adev;
5403 	u32 ref_and_mask, reg_mem_engine;
5404 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5405 
5406 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5407 		switch (ring->me) {
5408 		case 1:
5409 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5410 			break;
5411 		case 2:
5412 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5413 			break;
5414 		default:
5415 			return;
5416 		}
5417 		reg_mem_engine = 0;
5418 	} else {
5419 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5420 		reg_mem_engine = 1; /* pfp */
5421 	}
5422 
5423 	gfx_v11_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5424 			       adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5425 			       adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5426 			       ref_and_mask, ref_and_mask, 0x20);
5427 }
5428 
5429 static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5430 				       struct amdgpu_job *job,
5431 				       struct amdgpu_ib *ib,
5432 				       uint32_t flags)
5433 {
5434 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5435 	u32 header, control = 0;
5436 
5437 	BUG_ON(ib->flags & AMDGPU_IB_FLAG_CE);
5438 
5439 	header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5440 
5441 	control |= ib->length_dw | (vmid << 24);
5442 
5443 	if ((amdgpu_sriov_vf(ring->adev) || amdgpu_mcbp) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5444 		control |= INDIRECT_BUFFER_PRE_ENB(1);
5445 
5446 		if (flags & AMDGPU_IB_PREEMPTED)
5447 			control |= INDIRECT_BUFFER_PRE_RESUME(1);
5448 
5449 		if (vmid)
5450 			gfx_v11_0_ring_emit_de_meta(ring,
5451 				    (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false);
5452 	}
5453 
5454 	if (ring->is_mes_queue)
5455 		/* inherit vmid from mqd */
5456 		control |= 0x400000;
5457 
5458 	amdgpu_ring_write(ring, header);
5459 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5460 	amdgpu_ring_write(ring,
5461 #ifdef __BIG_ENDIAN
5462 		(2 << 0) |
5463 #endif
5464 		lower_32_bits(ib->gpu_addr));
5465 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5466 	amdgpu_ring_write(ring, control);
5467 }
5468 
5469 static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5470 					   struct amdgpu_job *job,
5471 					   struct amdgpu_ib *ib,
5472 					   uint32_t flags)
5473 {
5474 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5475 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5476 
5477 	if (ring->is_mes_queue)
5478 		/* inherit vmid from mqd */
5479 		control |= 0x40000000;
5480 
5481 	/* Currently, there is a high possibility to get wave ID mismatch
5482 	 * between ME and GDS, leading to a hw deadlock, because ME generates
5483 	 * different wave IDs than the GDS expects. This situation happens
5484 	 * randomly when at least 5 compute pipes use GDS ordered append.
5485 	 * The wave IDs generated by ME are also wrong after suspend/resume.
5486 	 * Those are probably bugs somewhere else in the kernel driver.
5487 	 *
5488 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5489 	 * GDS to 0 for this ring (me/pipe).
5490 	 */
5491 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5492 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5493 		amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID);
5494 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5495 	}
5496 
5497 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5498 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5499 	amdgpu_ring_write(ring,
5500 #ifdef __BIG_ENDIAN
5501 				(2 << 0) |
5502 #endif
5503 				lower_32_bits(ib->gpu_addr));
5504 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5505 	amdgpu_ring_write(ring, control);
5506 }
5507 
5508 static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5509 				     u64 seq, unsigned flags)
5510 {
5511 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5512 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5513 
5514 	/* RELEASE_MEM - flush caches, send int */
5515 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5516 	amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ |
5517 				 PACKET3_RELEASE_MEM_GCR_GL2_WB |
5518 				 PACKET3_RELEASE_MEM_GCR_GL2_INV |
5519 				 PACKET3_RELEASE_MEM_GCR_GL2_US |
5520 				 PACKET3_RELEASE_MEM_GCR_GL1_INV |
5521 				 PACKET3_RELEASE_MEM_GCR_GLV_INV |
5522 				 PACKET3_RELEASE_MEM_GCR_GLM_INV |
5523 				 PACKET3_RELEASE_MEM_GCR_GLM_WB |
5524 				 PACKET3_RELEASE_MEM_CACHE_POLICY(3) |
5525 				 PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5526 				 PACKET3_RELEASE_MEM_EVENT_INDEX(5)));
5527 	amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) |
5528 				 PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0)));
5529 
5530 	/*
5531 	 * the address should be Qword aligned if 64bit write, Dword
5532 	 * aligned if only send 32bit data low (discard data high)
5533 	 */
5534 	if (write64bit)
5535 		BUG_ON(addr & 0x7);
5536 	else
5537 		BUG_ON(addr & 0x3);
5538 	amdgpu_ring_write(ring, lower_32_bits(addr));
5539 	amdgpu_ring_write(ring, upper_32_bits(addr));
5540 	amdgpu_ring_write(ring, lower_32_bits(seq));
5541 	amdgpu_ring_write(ring, upper_32_bits(seq));
5542 	amdgpu_ring_write(ring, ring->is_mes_queue ?
5543 			 (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0);
5544 }
5545 
5546 static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5547 {
5548 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5549 	uint32_t seq = ring->fence_drv.sync_seq;
5550 	uint64_t addr = ring->fence_drv.gpu_addr;
5551 
5552 	gfx_v11_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr),
5553 			       upper_32_bits(addr), seq, 0xffffffff, 4);
5554 }
5555 
5556 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
5557 				   uint16_t pasid, uint32_t flush_type,
5558 				   bool all_hub, uint8_t dst_sel)
5559 {
5560 	amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
5561 	amdgpu_ring_write(ring,
5562 			  PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) |
5563 			  PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
5564 			  PACKET3_INVALIDATE_TLBS_PASID(pasid) |
5565 			  PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
5566 }
5567 
5568 static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5569 					 unsigned vmid, uint64_t pd_addr)
5570 {
5571 	if (ring->is_mes_queue)
5572 		gfx_v11_0_ring_invalidate_tlbs(ring, 0, 0, false, 0);
5573 	else
5574 		amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5575 
5576 	/* compute doesn't have PFP */
5577 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5578 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5579 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5580 		amdgpu_ring_write(ring, 0x0);
5581 	}
5582 }
5583 
5584 static void gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5585 					  u64 seq, unsigned int flags)
5586 {
5587 	struct amdgpu_device *adev = ring->adev;
5588 
5589 	/* we only allocate 32bit for each seq wb address */
5590 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5591 
5592 	/* write fence seq to the "addr" */
5593 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5594 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5595 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5596 	amdgpu_ring_write(ring, lower_32_bits(addr));
5597 	amdgpu_ring_write(ring, upper_32_bits(addr));
5598 	amdgpu_ring_write(ring, lower_32_bits(seq));
5599 
5600 	if (flags & AMDGPU_FENCE_FLAG_INT) {
5601 		/* set register to trigger INT */
5602 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5603 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5604 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5605 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS));
5606 		amdgpu_ring_write(ring, 0);
5607 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5608 	}
5609 }
5610 
5611 static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
5612 					 uint32_t flags)
5613 {
5614 	uint32_t dw2 = 0;
5615 
5616 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5617 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5618 		/* set load_global_config & load_global_uconfig */
5619 		dw2 |= 0x8001;
5620 		/* set load_cs_sh_regs */
5621 		dw2 |= 0x01000000;
5622 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
5623 		dw2 |= 0x10002;
5624 	}
5625 
5626 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5627 	amdgpu_ring_write(ring, dw2);
5628 	amdgpu_ring_write(ring, 0);
5629 }
5630 
5631 static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5632 {
5633 	unsigned ret;
5634 
5635 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5636 	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5637 	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5638 	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5639 	ret = ring->wptr & ring->buf_mask;
5640 	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5641 
5642 	return ret;
5643 }
5644 
5645 static void gfx_v11_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5646 {
5647 	unsigned cur;
5648 	BUG_ON(offset > ring->buf_mask);
5649 	BUG_ON(ring->ring[offset] != 0x55aa55aa);
5650 
5651 	cur = (ring->wptr - 1) & ring->buf_mask;
5652 	if (likely(cur > offset))
5653 		ring->ring[offset] = cur - offset;
5654 	else
5655 		ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
5656 }
5657 
5658 static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring)
5659 {
5660 	int i, r = 0;
5661 	struct amdgpu_device *adev = ring->adev;
5662 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
5663 	struct amdgpu_ring *kiq_ring = &kiq->ring;
5664 	unsigned long flags;
5665 
5666 	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
5667 		return -EINVAL;
5668 
5669 	spin_lock_irqsave(&kiq->ring_lock, flags);
5670 
5671 	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
5672 		spin_unlock_irqrestore(&kiq->ring_lock, flags);
5673 		return -ENOMEM;
5674 	}
5675 
5676 	/* assert preemption condition */
5677 	amdgpu_ring_set_preempt_cond_exec(ring, false);
5678 
5679 	/* assert IB preemption, emit the trailing fence */
5680 	kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
5681 				   ring->trail_fence_gpu_addr,
5682 				   ++ring->trail_seq);
5683 	amdgpu_ring_commit(kiq_ring);
5684 
5685 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
5686 
5687 	/* poll the trailing fence */
5688 	for (i = 0; i < adev->usec_timeout; i++) {
5689 		if (ring->trail_seq ==
5690 		    le32_to_cpu(*(ring->trail_fence_cpu_addr)))
5691 			break;
5692 		udelay(1);
5693 	}
5694 
5695 	if (i >= adev->usec_timeout) {
5696 		r = -EINVAL;
5697 		DRM_ERROR("ring %d failed to preempt ib\n", ring->idx);
5698 	}
5699 
5700 	/* deassert preemption condition */
5701 	amdgpu_ring_set_preempt_cond_exec(ring, true);
5702 	return r;
5703 }
5704 
5705 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
5706 {
5707 	struct amdgpu_device *adev = ring->adev;
5708 	struct v10_de_ib_state de_payload = {0};
5709 	uint64_t offset, gds_addr, de_payload_gpu_addr;
5710 	void *de_payload_cpu_addr;
5711 	int cnt;
5712 
5713 	if (ring->is_mes_queue) {
5714 		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5715 				  gfx[0].gfx_meta_data) +
5716 			offsetof(struct v10_gfx_meta_data, de_payload);
5717 		de_payload_gpu_addr =
5718 			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5719 		de_payload_cpu_addr =
5720 			amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5721 
5722 		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5723 				  gfx[0].gds_backup) +
5724 			offsetof(struct v10_gfx_meta_data, de_payload);
5725 		gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5726 	} else {
5727 		offset = offsetof(struct v10_gfx_meta_data, de_payload);
5728 		de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5729 		de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5730 
5731 		gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
5732 				 AMDGPU_CSA_SIZE - adev->gds.gds_size,
5733 				 PAGE_SIZE);
5734 	}
5735 
5736 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5737 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5738 
5739 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5740 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5741 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5742 				 WRITE_DATA_DST_SEL(8) |
5743 				 WR_CONFIRM) |
5744 				 WRITE_DATA_CACHE_POLICY(0));
5745 	amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
5746 	amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
5747 
5748 	if (resume)
5749 		amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
5750 					   sizeof(de_payload) >> 2);
5751 	else
5752 		amdgpu_ring_write_multiple(ring, (void *)&de_payload,
5753 					   sizeof(de_payload) >> 2);
5754 }
5755 
5756 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5757 				    bool secure)
5758 {
5759 	uint32_t v = secure ? FRAME_TMZ : 0;
5760 
5761 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5762 	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5763 }
5764 
5765 static void gfx_v11_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5766 				     uint32_t reg_val_offs)
5767 {
5768 	struct amdgpu_device *adev = ring->adev;
5769 
5770 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5771 	amdgpu_ring_write(ring, 0 |	/* src: register*/
5772 				(5 << 8) |	/* dst: memory */
5773 				(1 << 20));	/* write confirm */
5774 	amdgpu_ring_write(ring, reg);
5775 	amdgpu_ring_write(ring, 0);
5776 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5777 				reg_val_offs * 4));
5778 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5779 				reg_val_offs * 4));
5780 }
5781 
5782 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5783 				   uint32_t val)
5784 {
5785 	uint32_t cmd = 0;
5786 
5787 	switch (ring->funcs->type) {
5788 	case AMDGPU_RING_TYPE_GFX:
5789 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5790 		break;
5791 	case AMDGPU_RING_TYPE_KIQ:
5792 		cmd = (1 << 16); /* no inc addr */
5793 		break;
5794 	default:
5795 		cmd = WR_CONFIRM;
5796 		break;
5797 	}
5798 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5799 	amdgpu_ring_write(ring, cmd);
5800 	amdgpu_ring_write(ring, reg);
5801 	amdgpu_ring_write(ring, 0);
5802 	amdgpu_ring_write(ring, val);
5803 }
5804 
5805 static void gfx_v11_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5806 					uint32_t val, uint32_t mask)
5807 {
5808 	gfx_v11_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5809 }
5810 
5811 static void gfx_v11_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5812 						   uint32_t reg0, uint32_t reg1,
5813 						   uint32_t ref, uint32_t mask)
5814 {
5815 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5816 
5817 	gfx_v11_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5818 			       ref, mask, 0x20);
5819 }
5820 
5821 static void gfx_v11_0_ring_soft_recovery(struct amdgpu_ring *ring,
5822 					 unsigned vmid)
5823 {
5824 	struct amdgpu_device *adev = ring->adev;
5825 	uint32_t value = 0;
5826 
5827 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5828 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5829 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5830 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5831 	WREG32_SOC15(GC, 0, regSQ_CMD, value);
5832 }
5833 
5834 static void
5835 gfx_v11_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5836 				      uint32_t me, uint32_t pipe,
5837 				      enum amdgpu_interrupt_state state)
5838 {
5839 	uint32_t cp_int_cntl, cp_int_cntl_reg;
5840 
5841 	if (!me) {
5842 		switch (pipe) {
5843 		case 0:
5844 			cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0);
5845 			break;
5846 		case 1:
5847 			cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1);
5848 			break;
5849 		default:
5850 			DRM_DEBUG("invalid pipe %d\n", pipe);
5851 			return;
5852 		}
5853 	} else {
5854 		DRM_DEBUG("invalid me %d\n", me);
5855 		return;
5856 	}
5857 
5858 	switch (state) {
5859 	case AMDGPU_IRQ_STATE_DISABLE:
5860 		cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
5861 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5862 					    TIME_STAMP_INT_ENABLE, 0);
5863 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5864 					    GENERIC0_INT_ENABLE, 0);
5865 		WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
5866 		break;
5867 	case AMDGPU_IRQ_STATE_ENABLE:
5868 		cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
5869 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5870 					    TIME_STAMP_INT_ENABLE, 1);
5871 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5872 					    GENERIC0_INT_ENABLE, 1);
5873 		WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
5874 		break;
5875 	default:
5876 		break;
5877 	}
5878 }
5879 
5880 static void gfx_v11_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5881 						     int me, int pipe,
5882 						     enum amdgpu_interrupt_state state)
5883 {
5884 	u32 mec_int_cntl, mec_int_cntl_reg;
5885 
5886 	/*
5887 	 * amdgpu controls only the first MEC. That's why this function only
5888 	 * handles the setting of interrupts for this specific MEC. All other
5889 	 * pipes' interrupts are set by amdkfd.
5890 	 */
5891 
5892 	if (me == 1) {
5893 		switch (pipe) {
5894 		case 0:
5895 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
5896 			break;
5897 		case 1:
5898 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL);
5899 			break;
5900 		case 2:
5901 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL);
5902 			break;
5903 		case 3:
5904 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL);
5905 			break;
5906 		default:
5907 			DRM_DEBUG("invalid pipe %d\n", pipe);
5908 			return;
5909 		}
5910 	} else {
5911 		DRM_DEBUG("invalid me %d\n", me);
5912 		return;
5913 	}
5914 
5915 	switch (state) {
5916 	case AMDGPU_IRQ_STATE_DISABLE:
5917 		mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
5918 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5919 					     TIME_STAMP_INT_ENABLE, 0);
5920 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5921 					     GENERIC0_INT_ENABLE, 0);
5922 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5923 		break;
5924 	case AMDGPU_IRQ_STATE_ENABLE:
5925 		mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
5926 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5927 					     TIME_STAMP_INT_ENABLE, 1);
5928 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5929 					     GENERIC0_INT_ENABLE, 1);
5930 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5931 		break;
5932 	default:
5933 		break;
5934 	}
5935 }
5936 
5937 static int gfx_v11_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5938 					    struct amdgpu_irq_src *src,
5939 					    unsigned type,
5940 					    enum amdgpu_interrupt_state state)
5941 {
5942 	switch (type) {
5943 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5944 		gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 0, state);
5945 		break;
5946 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP:
5947 		gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 1, state);
5948 		break;
5949 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5950 		gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5951 		break;
5952 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5953 		gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5954 		break;
5955 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5956 		gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5957 		break;
5958 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5959 		gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5960 		break;
5961 	default:
5962 		break;
5963 	}
5964 	return 0;
5965 }
5966 
5967 static int gfx_v11_0_eop_irq(struct amdgpu_device *adev,
5968 			     struct amdgpu_irq_src *source,
5969 			     struct amdgpu_iv_entry *entry)
5970 {
5971 	int i;
5972 	u8 me_id, pipe_id, queue_id;
5973 	struct amdgpu_ring *ring;
5974 	uint32_t mes_queue_id = entry->src_data[0];
5975 
5976 	DRM_DEBUG("IH: CP EOP\n");
5977 
5978 	if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
5979 		struct amdgpu_mes_queue *queue;
5980 
5981 		mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
5982 
5983 		spin_lock(&adev->mes.queue_id_lock);
5984 		queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
5985 		if (queue) {
5986 			DRM_DEBUG("process mes queue id = %d\n", mes_queue_id);
5987 			amdgpu_fence_process(queue->ring);
5988 		}
5989 		spin_unlock(&adev->mes.queue_id_lock);
5990 	} else {
5991 		me_id = (entry->ring_id & 0x0c) >> 2;
5992 		pipe_id = (entry->ring_id & 0x03) >> 0;
5993 		queue_id = (entry->ring_id & 0x70) >> 4;
5994 
5995 		switch (me_id) {
5996 		case 0:
5997 			if (pipe_id == 0)
5998 				amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5999 			else
6000 				amdgpu_fence_process(&adev->gfx.gfx_ring[1]);
6001 			break;
6002 		case 1:
6003 		case 2:
6004 			for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6005 				ring = &adev->gfx.compute_ring[i];
6006 				/* Per-queue interrupt is supported for MEC starting from VI.
6007 				 * The interrupt can only be enabled/disabled per pipe instead
6008 				 * of per queue.
6009 				 */
6010 				if ((ring->me == me_id) &&
6011 				    (ring->pipe == pipe_id) &&
6012 				    (ring->queue == queue_id))
6013 					amdgpu_fence_process(ring);
6014 			}
6015 			break;
6016 		}
6017 	}
6018 
6019 	return 0;
6020 }
6021 
6022 static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6023 					      struct amdgpu_irq_src *source,
6024 					      unsigned type,
6025 					      enum amdgpu_interrupt_state state)
6026 {
6027 	switch (state) {
6028 	case AMDGPU_IRQ_STATE_DISABLE:
6029 	case AMDGPU_IRQ_STATE_ENABLE:
6030 		WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0,
6031 			       PRIV_REG_INT_ENABLE,
6032 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6033 		break;
6034 	default:
6035 		break;
6036 	}
6037 
6038 	return 0;
6039 }
6040 
6041 static int gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6042 					       struct amdgpu_irq_src *source,
6043 					       unsigned type,
6044 					       enum amdgpu_interrupt_state state)
6045 {
6046 	switch (state) {
6047 	case AMDGPU_IRQ_STATE_DISABLE:
6048 	case AMDGPU_IRQ_STATE_ENABLE:
6049 		WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0,
6050 			       PRIV_INSTR_INT_ENABLE,
6051 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6052 		break;
6053 	default:
6054 		break;
6055 	}
6056 
6057 	return 0;
6058 }
6059 
6060 static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev,
6061 					struct amdgpu_iv_entry *entry)
6062 {
6063 	u8 me_id, pipe_id, queue_id;
6064 	struct amdgpu_ring *ring;
6065 	int i;
6066 
6067 	me_id = (entry->ring_id & 0x0c) >> 2;
6068 	pipe_id = (entry->ring_id & 0x03) >> 0;
6069 	queue_id = (entry->ring_id & 0x70) >> 4;
6070 
6071 	switch (me_id) {
6072 	case 0:
6073 		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
6074 			ring = &adev->gfx.gfx_ring[i];
6075 			/* we only enabled 1 gfx queue per pipe for now */
6076 			if (ring->me == me_id && ring->pipe == pipe_id)
6077 				drm_sched_fault(&ring->sched);
6078 		}
6079 		break;
6080 	case 1:
6081 	case 2:
6082 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6083 			ring = &adev->gfx.compute_ring[i];
6084 			if (ring->me == me_id && ring->pipe == pipe_id &&
6085 			    ring->queue == queue_id)
6086 				drm_sched_fault(&ring->sched);
6087 		}
6088 		break;
6089 	default:
6090 		BUG();
6091 		break;
6092 	}
6093 }
6094 
6095 static int gfx_v11_0_priv_reg_irq(struct amdgpu_device *adev,
6096 				  struct amdgpu_irq_src *source,
6097 				  struct amdgpu_iv_entry *entry)
6098 {
6099 	DRM_ERROR("Illegal register access in command stream\n");
6100 	gfx_v11_0_handle_priv_fault(adev, entry);
6101 	return 0;
6102 }
6103 
6104 static int gfx_v11_0_priv_inst_irq(struct amdgpu_device *adev,
6105 				   struct amdgpu_irq_src *source,
6106 				   struct amdgpu_iv_entry *entry)
6107 {
6108 	DRM_ERROR("Illegal instruction in command stream\n");
6109 	gfx_v11_0_handle_priv_fault(adev, entry);
6110 	return 0;
6111 }
6112 
6113 #if 0
6114 static int gfx_v11_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6115 					     struct amdgpu_irq_src *src,
6116 					     unsigned int type,
6117 					     enum amdgpu_interrupt_state state)
6118 {
6119 	uint32_t tmp, target;
6120 	struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6121 
6122 	target = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
6123 	target += ring->pipe;
6124 
6125 	switch (type) {
6126 	case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6127 		if (state == AMDGPU_IRQ_STATE_DISABLE) {
6128 			tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL);
6129 			tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6130 					    GENERIC2_INT_ENABLE, 0);
6131 			WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp);
6132 
6133 			tmp = RREG32_SOC15_IP(GC, target);
6134 			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL,
6135 					    GENERIC2_INT_ENABLE, 0);
6136 			WREG32_SOC15_IP(GC, target, tmp);
6137 		} else {
6138 			tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL);
6139 			tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6140 					    GENERIC2_INT_ENABLE, 1);
6141 			WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp);
6142 
6143 			tmp = RREG32_SOC15_IP(GC, target);
6144 			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL,
6145 					    GENERIC2_INT_ENABLE, 1);
6146 			WREG32_SOC15_IP(GC, target, tmp);
6147 		}
6148 		break;
6149 	default:
6150 		BUG(); /* kiq only support GENERIC2_INT now */
6151 		break;
6152 	}
6153 	return 0;
6154 }
6155 #endif
6156 
6157 static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring)
6158 {
6159 	const unsigned int gcr_cntl =
6160 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) |
6161 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) |
6162 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) |
6163 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) |
6164 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) |
6165 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) |
6166 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) |
6167 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1);
6168 
6169 	/* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */
6170 	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6));
6171 	amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */
6172 	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6173 	amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
6174 	amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6175 	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
6176 	amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6177 	amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
6178 }
6179 
6180 static const struct amd_ip_funcs gfx_v11_0_ip_funcs = {
6181 	.name = "gfx_v11_0",
6182 	.early_init = gfx_v11_0_early_init,
6183 	.late_init = gfx_v11_0_late_init,
6184 	.sw_init = gfx_v11_0_sw_init,
6185 	.sw_fini = gfx_v11_0_sw_fini,
6186 	.hw_init = gfx_v11_0_hw_init,
6187 	.hw_fini = gfx_v11_0_hw_fini,
6188 	.suspend = gfx_v11_0_suspend,
6189 	.resume = gfx_v11_0_resume,
6190 	.is_idle = gfx_v11_0_is_idle,
6191 	.wait_for_idle = gfx_v11_0_wait_for_idle,
6192 	.soft_reset = gfx_v11_0_soft_reset,
6193 	.check_soft_reset = gfx_v11_0_check_soft_reset,
6194 	.set_clockgating_state = gfx_v11_0_set_clockgating_state,
6195 	.set_powergating_state = gfx_v11_0_set_powergating_state,
6196 	.get_clockgating_state = gfx_v11_0_get_clockgating_state,
6197 };
6198 
6199 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
6200 	.type = AMDGPU_RING_TYPE_GFX,
6201 	.align_mask = 0xff,
6202 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6203 	.support_64bit_ptrs = true,
6204 	.vmhub = AMDGPU_GFXHUB_0,
6205 	.get_rptr = gfx_v11_0_ring_get_rptr_gfx,
6206 	.get_wptr = gfx_v11_0_ring_get_wptr_gfx,
6207 	.set_wptr = gfx_v11_0_ring_set_wptr_gfx,
6208 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
6209 		5 + /* COND_EXEC */
6210 		7 + /* PIPELINE_SYNC */
6211 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6212 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6213 		2 + /* VM_FLUSH */
6214 		8 + /* FENCE for VM_FLUSH */
6215 		20 + /* GDS switch */
6216 		5 + /* COND_EXEC */
6217 		7 + /* HDP_flush */
6218 		4 + /* VGT_flush */
6219 		31 + /*	DE_META */
6220 		3 + /* CNTX_CTRL */
6221 		5 + /* HDP_INVL */
6222 		8 + 8 + /* FENCE x2 */
6223 		8, /* gfx_v11_0_emit_mem_sync */
6224 	.emit_ib_size =	4, /* gfx_v11_0_ring_emit_ib_gfx */
6225 	.emit_ib = gfx_v11_0_ring_emit_ib_gfx,
6226 	.emit_fence = gfx_v11_0_ring_emit_fence,
6227 	.emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync,
6228 	.emit_vm_flush = gfx_v11_0_ring_emit_vm_flush,
6229 	.emit_gds_switch = gfx_v11_0_ring_emit_gds_switch,
6230 	.emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
6231 	.test_ring = gfx_v11_0_ring_test_ring,
6232 	.test_ib = gfx_v11_0_ring_test_ib,
6233 	.insert_nop = amdgpu_ring_insert_nop,
6234 	.pad_ib = amdgpu_ring_generic_pad_ib,
6235 	.emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl,
6236 	.init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec,
6237 	.patch_cond_exec = gfx_v11_0_ring_emit_patch_cond_exec,
6238 	.preempt_ib = gfx_v11_0_ring_preempt_ib,
6239 	.emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl,
6240 	.emit_wreg = gfx_v11_0_ring_emit_wreg,
6241 	.emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
6242 	.emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
6243 	.soft_recovery = gfx_v11_0_ring_soft_recovery,
6244 	.emit_mem_sync = gfx_v11_0_emit_mem_sync,
6245 };
6246 
6247 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
6248 	.type = AMDGPU_RING_TYPE_COMPUTE,
6249 	.align_mask = 0xff,
6250 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6251 	.support_64bit_ptrs = true,
6252 	.vmhub = AMDGPU_GFXHUB_0,
6253 	.get_rptr = gfx_v11_0_ring_get_rptr_compute,
6254 	.get_wptr = gfx_v11_0_ring_get_wptr_compute,
6255 	.set_wptr = gfx_v11_0_ring_set_wptr_compute,
6256 	.emit_frame_size =
6257 		20 + /* gfx_v11_0_ring_emit_gds_switch */
6258 		7 + /* gfx_v11_0_ring_emit_hdp_flush */
6259 		5 + /* hdp invalidate */
6260 		7 + /* gfx_v11_0_ring_emit_pipeline_sync */
6261 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6262 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6263 		2 + /* gfx_v11_0_ring_emit_vm_flush */
6264 		8 + 8 + 8 + /* gfx_v11_0_ring_emit_fence x3 for user fence, vm fence */
6265 		8, /* gfx_v11_0_emit_mem_sync */
6266 	.emit_ib_size =	7, /* gfx_v11_0_ring_emit_ib_compute */
6267 	.emit_ib = gfx_v11_0_ring_emit_ib_compute,
6268 	.emit_fence = gfx_v11_0_ring_emit_fence,
6269 	.emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync,
6270 	.emit_vm_flush = gfx_v11_0_ring_emit_vm_flush,
6271 	.emit_gds_switch = gfx_v11_0_ring_emit_gds_switch,
6272 	.emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
6273 	.test_ring = gfx_v11_0_ring_test_ring,
6274 	.test_ib = gfx_v11_0_ring_test_ib,
6275 	.insert_nop = amdgpu_ring_insert_nop,
6276 	.pad_ib = amdgpu_ring_generic_pad_ib,
6277 	.emit_wreg = gfx_v11_0_ring_emit_wreg,
6278 	.emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
6279 	.emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
6280 	.emit_mem_sync = gfx_v11_0_emit_mem_sync,
6281 };
6282 
6283 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = {
6284 	.type = AMDGPU_RING_TYPE_KIQ,
6285 	.align_mask = 0xff,
6286 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6287 	.support_64bit_ptrs = true,
6288 	.vmhub = AMDGPU_GFXHUB_0,
6289 	.get_rptr = gfx_v11_0_ring_get_rptr_compute,
6290 	.get_wptr = gfx_v11_0_ring_get_wptr_compute,
6291 	.set_wptr = gfx_v11_0_ring_set_wptr_compute,
6292 	.emit_frame_size =
6293 		20 + /* gfx_v11_0_ring_emit_gds_switch */
6294 		7 + /* gfx_v11_0_ring_emit_hdp_flush */
6295 		5 + /*hdp invalidate */
6296 		7 + /* gfx_v11_0_ring_emit_pipeline_sync */
6297 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6298 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6299 		2 + /* gfx_v11_0_ring_emit_vm_flush */
6300 		8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6301 	.emit_ib_size =	7, /* gfx_v11_0_ring_emit_ib_compute */
6302 	.emit_ib = gfx_v11_0_ring_emit_ib_compute,
6303 	.emit_fence = gfx_v11_0_ring_emit_fence_kiq,
6304 	.test_ring = gfx_v11_0_ring_test_ring,
6305 	.test_ib = gfx_v11_0_ring_test_ib,
6306 	.insert_nop = amdgpu_ring_insert_nop,
6307 	.pad_ib = amdgpu_ring_generic_pad_ib,
6308 	.emit_rreg = gfx_v11_0_ring_emit_rreg,
6309 	.emit_wreg = gfx_v11_0_ring_emit_wreg,
6310 	.emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
6311 	.emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
6312 };
6313 
6314 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev)
6315 {
6316 	int i;
6317 
6318 	adev->gfx.kiq.ring.funcs = &gfx_v11_0_ring_funcs_kiq;
6319 
6320 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6321 		adev->gfx.gfx_ring[i].funcs = &gfx_v11_0_ring_funcs_gfx;
6322 
6323 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6324 		adev->gfx.compute_ring[i].funcs = &gfx_v11_0_ring_funcs_compute;
6325 }
6326 
6327 static const struct amdgpu_irq_src_funcs gfx_v11_0_eop_irq_funcs = {
6328 	.set = gfx_v11_0_set_eop_interrupt_state,
6329 	.process = gfx_v11_0_eop_irq,
6330 };
6331 
6332 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_reg_irq_funcs = {
6333 	.set = gfx_v11_0_set_priv_reg_fault_state,
6334 	.process = gfx_v11_0_priv_reg_irq,
6335 };
6336 
6337 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = {
6338 	.set = gfx_v11_0_set_priv_inst_fault_state,
6339 	.process = gfx_v11_0_priv_inst_irq,
6340 };
6341 
6342 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev)
6343 {
6344 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6345 	adev->gfx.eop_irq.funcs = &gfx_v11_0_eop_irq_funcs;
6346 
6347 	adev->gfx.priv_reg_irq.num_types = 1;
6348 	adev->gfx.priv_reg_irq.funcs = &gfx_v11_0_priv_reg_irq_funcs;
6349 
6350 	adev->gfx.priv_inst_irq.num_types = 1;
6351 	adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs;
6352 }
6353 
6354 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev)
6355 {
6356 	if (adev->flags & AMD_IS_APU)
6357 		adev->gfx.imu.mode = MISSION_MODE;
6358 	else
6359 		adev->gfx.imu.mode = DEBUG_MODE;
6360 
6361 	adev->gfx.imu.funcs = &gfx_v11_0_imu_funcs;
6362 }
6363 
6364 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev)
6365 {
6366 	adev->gfx.rlc.funcs = &gfx_v11_0_rlc_funcs;
6367 }
6368 
6369 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev)
6370 {
6371 	unsigned total_cu = adev->gfx.config.max_cu_per_sh *
6372 			    adev->gfx.config.max_sh_per_se *
6373 			    adev->gfx.config.max_shader_engines;
6374 
6375 	adev->gds.gds_size = 0x1000;
6376 	adev->gds.gds_compute_max_wave_id = total_cu * 32 - 1;
6377 	adev->gds.gws_size = 64;
6378 	adev->gds.oa_size = 16;
6379 }
6380 
6381 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev)
6382 {
6383 	/* set gfx eng mqd */
6384 	adev->mqds[AMDGPU_HW_IP_GFX].mqd_size =
6385 		sizeof(struct v11_gfx_mqd);
6386 	adev->mqds[AMDGPU_HW_IP_GFX].init_mqd =
6387 		gfx_v11_0_gfx_mqd_init;
6388 	/* set compute eng mqd */
6389 	adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size =
6390 		sizeof(struct v11_compute_mqd);
6391 	adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd =
6392 		gfx_v11_0_compute_mqd_init;
6393 }
6394 
6395 static void gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev,
6396 							  u32 bitmap)
6397 {
6398 	u32 data;
6399 
6400 	if (!bitmap)
6401 		return;
6402 
6403 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
6404 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
6405 
6406 	WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data);
6407 }
6408 
6409 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev)
6410 {
6411 	u32 data, wgp_bitmask;
6412 	data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG);
6413 	data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG);
6414 
6415 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
6416 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
6417 
6418 	wgp_bitmask =
6419 		amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1);
6420 
6421 	return (~data) & wgp_bitmask;
6422 }
6423 
6424 static u32 gfx_v11_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev)
6425 {
6426 	u32 wgp_idx, wgp_active_bitmap;
6427 	u32 cu_bitmap_per_wgp, cu_active_bitmap;
6428 
6429 	wgp_active_bitmap = gfx_v11_0_get_wgp_active_bitmap_per_sh(adev);
6430 	cu_active_bitmap = 0;
6431 
6432 	for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) {
6433 		/* if there is one WGP enabled, it means 2 CUs will be enabled */
6434 		cu_bitmap_per_wgp = 3 << (2 * wgp_idx);
6435 		if (wgp_active_bitmap & (1 << wgp_idx))
6436 			cu_active_bitmap |= cu_bitmap_per_wgp;
6437 	}
6438 
6439 	return cu_active_bitmap;
6440 }
6441 
6442 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
6443 				 struct amdgpu_cu_info *cu_info)
6444 {
6445 	int i, j, k, counter, active_cu_number = 0;
6446 	u32 mask, bitmap;
6447 	unsigned disable_masks[8 * 2];
6448 
6449 	if (!adev || !cu_info)
6450 		return -EINVAL;
6451 
6452 	amdgpu_gfx_parse_disable_cu(disable_masks, 8, 2);
6453 
6454 	mutex_lock(&adev->grbm_idx_mutex);
6455 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6456 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6457 			mask = 1;
6458 			counter = 0;
6459 			gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff);
6460 			if (i < 8 && j < 2)
6461 				gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(
6462 					adev, disable_masks[i * 2 + j]);
6463 			bitmap = gfx_v11_0_get_cu_active_bitmap_per_sh(adev);
6464 
6465 			/**
6466 			 * GFX11 could support more than 4 SEs, while the bitmap
6467 			 * in cu_info struct is 4x4 and ioctl interface struct
6468 			 * drm_amdgpu_info_device should keep stable.
6469 			 * So we use last two columns of bitmap to store cu mask for
6470 			 * SEs 4 to 7, the layout of the bitmap is as below:
6471 			 *    SE0: {SH0,SH1} --> {bitmap[0][0], bitmap[0][1]}
6472 			 *    SE1: {SH0,SH1} --> {bitmap[1][0], bitmap[1][1]}
6473 			 *    SE2: {SH0,SH1} --> {bitmap[2][0], bitmap[2][1]}
6474 			 *    SE3: {SH0,SH1} --> {bitmap[3][0], bitmap[3][1]}
6475 			 *    SE4: {SH0,SH1} --> {bitmap[0][2], bitmap[0][3]}
6476 			 *    SE5: {SH0,SH1} --> {bitmap[1][2], bitmap[1][3]}
6477 			 *    SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]}
6478 			 *    SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]}
6479 			 */
6480 			cu_info->bitmap[i % 4][j + (i / 4) * 2] = bitmap;
6481 
6482 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
6483 				if (bitmap & mask)
6484 					counter++;
6485 
6486 				mask <<= 1;
6487 			}
6488 			active_cu_number += counter;
6489 		}
6490 	}
6491 	gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6492 	mutex_unlock(&adev->grbm_idx_mutex);
6493 
6494 	cu_info->number = active_cu_number;
6495 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6496 
6497 	return 0;
6498 }
6499 
6500 const struct amdgpu_ip_block_version gfx_v11_0_ip_block =
6501 {
6502 	.type = AMD_IP_BLOCK_TYPE_GFX,
6503 	.major = 11,
6504 	.minor = 0,
6505 	.rev = 0,
6506 	.funcs = &gfx_v11_0_ip_funcs,
6507 };
6508