1 /*
2  * Copyright 2021 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/delay.h>
24 #include <linux/kernel.h>
25 #include <linux/firmware.h>
26 #include <linux/module.h>
27 #include <linux/pci.h>
28 #include "amdgpu.h"
29 #include "amdgpu_gfx.h"
30 #include "amdgpu_psp.h"
31 #include "amdgpu_smu.h"
32 #include "amdgpu_atomfirmware.h"
33 #include "imu_v11_0.h"
34 #include "soc21.h"
35 #include "nvd.h"
36 
37 #include "gc/gc_11_0_0_offset.h"
38 #include "gc/gc_11_0_0_sh_mask.h"
39 #include "smuio/smuio_13_0_6_offset.h"
40 #include "smuio/smuio_13_0_6_sh_mask.h"
41 #include "navi10_enum.h"
42 #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h"
43 
44 #include "soc15.h"
45 #include "soc15d.h"
46 #include "clearstate_gfx11.h"
47 #include "v11_structs.h"
48 #include "gfx_v11_0.h"
49 #include "nbio_v4_3.h"
50 #include "mes_v11_0.h"
51 
52 #define GFX11_NUM_GFX_RINGS		1
53 #define GFX11_MEC_HPD_SIZE	2048
54 
55 #define RLCG_UCODE_LOADING_START_ADDRESS	0x00002000L
56 #define RLC_PG_DELAY_3_DEFAULT_GC_11_0_1	0x1388
57 
58 #define regCGTT_WD_CLK_CTRL		0x5086
59 #define regCGTT_WD_CLK_CTRL_BASE_IDX	1
60 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1	0x4e7e
61 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1_BASE_IDX	1
62 
63 MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin");
64 MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin");
65 MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin");
66 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin");
67 MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin");
68 MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin");
69 MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin");
70 MODULE_FIRMWARE("amdgpu/gc_11_0_1_mec.bin");
71 MODULE_FIRMWARE("amdgpu/gc_11_0_1_rlc.bin");
72 MODULE_FIRMWARE("amdgpu/gc_11_0_2_pfp.bin");
73 MODULE_FIRMWARE("amdgpu/gc_11_0_2_me.bin");
74 MODULE_FIRMWARE("amdgpu/gc_11_0_2_mec.bin");
75 MODULE_FIRMWARE("amdgpu/gc_11_0_2_rlc.bin");
76 MODULE_FIRMWARE("amdgpu/gc_11_0_3_pfp.bin");
77 MODULE_FIRMWARE("amdgpu/gc_11_0_3_me.bin");
78 MODULE_FIRMWARE("amdgpu/gc_11_0_3_mec.bin");
79 MODULE_FIRMWARE("amdgpu/gc_11_0_3_rlc.bin");
80 MODULE_FIRMWARE("amdgpu/gc_11_0_4_pfp.bin");
81 MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin");
82 MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin");
83 MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin");
84 
85 static const struct soc15_reg_golden golden_settings_gc_11_0_1[] =
86 {
87 	SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_GS_NGG_CLK_CTRL, 0x9fff8fff, 0x00000010),
88 	SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_WD_CLK_CTRL, 0xffff8fff, 0x00000010),
89 	SOC15_REG_GOLDEN_VALUE(GC, 0, regCPF_GCR_CNTL, 0x0007ffff, 0x0000c200),
90 	SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xffff001b, 0x00f01988),
91 	SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf0ffffff, 0x00880007),
92 	SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_ENHANCE_3, 0xfffffffd, 0x00000008),
93 	SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_VRS_SURFACE_CNTL_1, 0xfff891ff, 0x55480100),
94 	SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000),
95 	SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xfcffffff, 0x0000000a)
96 };
97 
98 #define DEFAULT_SH_MEM_CONFIG \
99 	((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
100 	 (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
101 	 (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT))
102 
103 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev);
104 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev);
105 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev);
106 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev);
107 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev);
108 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev);
109 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev);
110 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
111                                  struct amdgpu_cu_info *cu_info);
112 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev);
113 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
114 				   u32 sh_num, u32 instance);
115 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev);
116 
117 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
118 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
119 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
120 				     uint32_t val);
121 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
122 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
123 					   uint16_t pasid, uint32_t flush_type,
124 					   bool all_hub, uint8_t dst_sel);
125 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev);
126 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev);
127 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
128 				      bool enable);
129 
130 static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
131 {
132 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
133 	amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
134 			  PACKET3_SET_RESOURCES_QUEUE_TYPE(0));	/* vmid_mask:0 queue_type:0 (KIQ) */
135 	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
136 	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
137 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
138 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
139 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
140 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
141 }
142 
143 static void gfx11_kiq_map_queues(struct amdgpu_ring *kiq_ring,
144 				 struct amdgpu_ring *ring)
145 {
146 	uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
147 	uint64_t wptr_addr = ring->wptr_gpu_addr;
148 	uint32_t me = 0, eng_sel = 0;
149 
150 	switch (ring->funcs->type) {
151 	case AMDGPU_RING_TYPE_COMPUTE:
152 		me = 1;
153 		eng_sel = 0;
154 		break;
155 	case AMDGPU_RING_TYPE_GFX:
156 		me = 0;
157 		eng_sel = 4;
158 		break;
159 	case AMDGPU_RING_TYPE_MES:
160 		me = 2;
161 		eng_sel = 5;
162 		break;
163 	default:
164 		WARN_ON(1);
165 	}
166 
167 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
168 	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
169 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
170 			  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
171 			  PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
172 			  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
173 			  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
174 			  PACKET3_MAP_QUEUES_ME((me)) |
175 			  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
176 			  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
177 			  PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
178 			  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
179 	amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
180 	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
181 	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
182 	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
183 	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
184 }
185 
186 static void gfx11_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
187 				   struct amdgpu_ring *ring,
188 				   enum amdgpu_unmap_queues_action action,
189 				   u64 gpu_addr, u64 seq)
190 {
191 	struct amdgpu_device *adev = kiq_ring->adev;
192 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
193 
194 	if (adev->enable_mes && !adev->gfx.kiq.ring.sched.ready) {
195 		amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq);
196 		return;
197 	}
198 
199 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
200 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
201 			  PACKET3_UNMAP_QUEUES_ACTION(action) |
202 			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
203 			  PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
204 			  PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
205 	amdgpu_ring_write(kiq_ring,
206 		  PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
207 
208 	if (action == PREEMPT_QUEUES_NO_UNMAP) {
209 		amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
210 		amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
211 		amdgpu_ring_write(kiq_ring, seq);
212 	} else {
213 		amdgpu_ring_write(kiq_ring, 0);
214 		amdgpu_ring_write(kiq_ring, 0);
215 		amdgpu_ring_write(kiq_ring, 0);
216 	}
217 }
218 
219 static void gfx11_kiq_query_status(struct amdgpu_ring *kiq_ring,
220 				   struct amdgpu_ring *ring,
221 				   u64 addr,
222 				   u64 seq)
223 {
224 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
225 
226 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
227 	amdgpu_ring_write(kiq_ring,
228 			  PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
229 			  PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
230 			  PACKET3_QUERY_STATUS_COMMAND(2));
231 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
232 			  PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
233 			  PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
234 	amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
235 	amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
236 	amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
237 	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
238 }
239 
240 static void gfx11_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
241 				uint16_t pasid, uint32_t flush_type,
242 				bool all_hub)
243 {
244 	gfx_v11_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1);
245 }
246 
247 static const struct kiq_pm4_funcs gfx_v11_0_kiq_pm4_funcs = {
248 	.kiq_set_resources = gfx11_kiq_set_resources,
249 	.kiq_map_queues = gfx11_kiq_map_queues,
250 	.kiq_unmap_queues = gfx11_kiq_unmap_queues,
251 	.kiq_query_status = gfx11_kiq_query_status,
252 	.kiq_invalidate_tlbs = gfx11_kiq_invalidate_tlbs,
253 	.set_resources_size = 8,
254 	.map_queues_size = 7,
255 	.unmap_queues_size = 6,
256 	.query_status_size = 7,
257 	.invalidate_tlbs_size = 2,
258 };
259 
260 static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
261 {
262 	adev->gfx.kiq.pmf = &gfx_v11_0_kiq_pm4_funcs;
263 }
264 
265 static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev)
266 {
267 	switch (adev->ip_versions[GC_HWIP][0]) {
268 	case IP_VERSION(11, 0, 1):
269 	case IP_VERSION(11, 0, 4):
270 		soc15_program_register_sequence(adev,
271 						golden_settings_gc_11_0_1,
272 						(const u32)ARRAY_SIZE(golden_settings_gc_11_0_1));
273 		break;
274 	default:
275 		break;
276 	}
277 }
278 
279 static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
280 				       bool wc, uint32_t reg, uint32_t val)
281 {
282 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
283 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
284 			  WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0));
285 	amdgpu_ring_write(ring, reg);
286 	amdgpu_ring_write(ring, 0);
287 	amdgpu_ring_write(ring, val);
288 }
289 
290 static void gfx_v11_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
291 				  int mem_space, int opt, uint32_t addr0,
292 				  uint32_t addr1, uint32_t ref, uint32_t mask,
293 				  uint32_t inv)
294 {
295 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
296 	amdgpu_ring_write(ring,
297 			  /* memory (1) or register (0) */
298 			  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
299 			   WAIT_REG_MEM_OPERATION(opt) | /* wait */
300 			   WAIT_REG_MEM_FUNCTION(3) |  /* equal */
301 			   WAIT_REG_MEM_ENGINE(eng_sel)));
302 
303 	if (mem_space)
304 		BUG_ON(addr0 & 0x3); /* Dword align */
305 	amdgpu_ring_write(ring, addr0);
306 	amdgpu_ring_write(ring, addr1);
307 	amdgpu_ring_write(ring, ref);
308 	amdgpu_ring_write(ring, mask);
309 	amdgpu_ring_write(ring, inv); /* poll interval */
310 }
311 
312 static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring)
313 {
314 	struct amdgpu_device *adev = ring->adev;
315 	uint32_t scratch = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
316 	uint32_t tmp = 0;
317 	unsigned i;
318 	int r;
319 
320 	WREG32(scratch, 0xCAFEDEAD);
321 	r = amdgpu_ring_alloc(ring, 5);
322 	if (r) {
323 		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
324 			  ring->idx, r);
325 		return r;
326 	}
327 
328 	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) {
329 		gfx_v11_0_ring_emit_wreg(ring, scratch, 0xDEADBEEF);
330 	} else {
331 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
332 		amdgpu_ring_write(ring, scratch -
333 				  PACKET3_SET_UCONFIG_REG_START);
334 		amdgpu_ring_write(ring, 0xDEADBEEF);
335 	}
336 	amdgpu_ring_commit(ring);
337 
338 	for (i = 0; i < adev->usec_timeout; i++) {
339 		tmp = RREG32(scratch);
340 		if (tmp == 0xDEADBEEF)
341 			break;
342 		if (amdgpu_emu_mode == 1)
343 			msleep(1);
344 		else
345 			udelay(1);
346 	}
347 
348 	if (i >= adev->usec_timeout)
349 		r = -ETIMEDOUT;
350 	return r;
351 }
352 
353 static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
354 {
355 	struct amdgpu_device *adev = ring->adev;
356 	struct amdgpu_ib ib;
357 	struct dma_fence *f = NULL;
358 	unsigned index;
359 	uint64_t gpu_addr;
360 	volatile uint32_t *cpu_ptr;
361 	long r;
362 
363 	/* MES KIQ fw hasn't indirect buffer support for now */
364 	if (adev->enable_mes_kiq &&
365 	    ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
366 		return 0;
367 
368 	memset(&ib, 0, sizeof(ib));
369 
370 	if (ring->is_mes_queue) {
371 		uint32_t padding, offset;
372 
373 		offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
374 		padding = amdgpu_mes_ctx_get_offs(ring,
375 						  AMDGPU_MES_CTX_PADDING_OFFS);
376 
377 		ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
378 		ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
379 
380 		gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding);
381 		cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding);
382 		*cpu_ptr = cpu_to_le32(0xCAFEDEAD);
383 	} else {
384 		r = amdgpu_device_wb_get(adev, &index);
385 		if (r)
386 			return r;
387 
388 		gpu_addr = adev->wb.gpu_addr + (index * 4);
389 		adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
390 		cpu_ptr = &adev->wb.wb[index];
391 
392 		r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib);
393 		if (r) {
394 			DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
395 			goto err1;
396 		}
397 	}
398 
399 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
400 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
401 	ib.ptr[2] = lower_32_bits(gpu_addr);
402 	ib.ptr[3] = upper_32_bits(gpu_addr);
403 	ib.ptr[4] = 0xDEADBEEF;
404 	ib.length_dw = 5;
405 
406 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
407 	if (r)
408 		goto err2;
409 
410 	r = dma_fence_wait_timeout(f, false, timeout);
411 	if (r == 0) {
412 		r = -ETIMEDOUT;
413 		goto err2;
414 	} else if (r < 0) {
415 		goto err2;
416 	}
417 
418 	if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF)
419 		r = 0;
420 	else
421 		r = -EINVAL;
422 err2:
423 	if (!ring->is_mes_queue)
424 		amdgpu_ib_free(adev, &ib, NULL);
425 	dma_fence_put(f);
426 err1:
427 	if (!ring->is_mes_queue)
428 		amdgpu_device_wb_free(adev, index);
429 	return r;
430 }
431 
432 static void gfx_v11_0_free_microcode(struct amdgpu_device *adev)
433 {
434 	amdgpu_ucode_release(&adev->gfx.pfp_fw);
435 	amdgpu_ucode_release(&adev->gfx.me_fw);
436 	amdgpu_ucode_release(&adev->gfx.rlc_fw);
437 	amdgpu_ucode_release(&adev->gfx.mec_fw);
438 
439 	kfree(adev->gfx.rlc.register_list_format);
440 }
441 
442 static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev, const char *ucode_prefix)
443 {
444 	const struct psp_firmware_header_v1_0 *toc_hdr;
445 	int err = 0;
446 	char fw_name[40];
447 
448 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_toc.bin", ucode_prefix);
449 	err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, fw_name);
450 	if (err)
451 		goto out;
452 
453 	toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data;
454 	adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version);
455 	adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version);
456 	adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes);
457 	adev->psp.toc.start_addr = (uint8_t *)toc_hdr +
458 				le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes);
459 	return 0;
460 out:
461 	amdgpu_ucode_release(&adev->psp.toc_fw);
462 	return err;
463 }
464 
465 static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
466 {
467 	char fw_name[40];
468 	char ucode_prefix[30];
469 	int err;
470 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
471 	uint16_t version_major;
472 	uint16_t version_minor;
473 
474 	DRM_DEBUG("\n");
475 
476 	amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
477 
478 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", ucode_prefix);
479 	err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name);
480 	if (err)
481 		goto out;
482 	/* check pfp fw hdr version to decide if enable rs64 for gfx11.*/
483 	adev->gfx.rs64_enable = amdgpu_ucode_hdr_version(
484 				(union amdgpu_firmware_header *)
485 				adev->gfx.pfp_fw->data, 2, 0);
486 	if (adev->gfx.rs64_enable) {
487 		dev_info(adev->dev, "CP RS64 enable\n");
488 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP);
489 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK);
490 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK);
491 	} else {
492 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
493 	}
494 
495 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", ucode_prefix);
496 	err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name);
497 	if (err)
498 		goto out;
499 	if (adev->gfx.rs64_enable) {
500 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME);
501 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK);
502 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK);
503 	} else {
504 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
505 	}
506 
507 	if (!amdgpu_sriov_vf(adev)) {
508 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix);
509 		err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name);
510 		if (err)
511 			goto out;
512 		rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
513 		version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
514 		version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
515 		err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
516 		if (err)
517 			goto out;
518 	}
519 
520 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", ucode_prefix);
521 	err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name);
522 	if (err)
523 		goto out;
524 	if (adev->gfx.rs64_enable) {
525 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC);
526 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK);
527 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK);
528 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK);
529 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK);
530 	} else {
531 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
532 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
533 	}
534 
535 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
536 		err = gfx_v11_0_init_toc_microcode(adev, ucode_prefix);
537 
538 	/* only one MEC for gfx 11.0.0. */
539 	adev->gfx.mec2_fw = NULL;
540 
541 out:
542 	if (err) {
543 		amdgpu_ucode_release(&adev->gfx.pfp_fw);
544 		amdgpu_ucode_release(&adev->gfx.me_fw);
545 		amdgpu_ucode_release(&adev->gfx.rlc_fw);
546 		amdgpu_ucode_release(&adev->gfx.mec_fw);
547 	}
548 
549 	return err;
550 }
551 
552 static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev)
553 {
554 	u32 count = 0;
555 	const struct cs_section_def *sect = NULL;
556 	const struct cs_extent_def *ext = NULL;
557 
558 	/* begin clear state */
559 	count += 2;
560 	/* context control state */
561 	count += 3;
562 
563 	for (sect = gfx11_cs_data; sect->section != NULL; ++sect) {
564 		for (ext = sect->section; ext->extent != NULL; ++ext) {
565 			if (sect->id == SECT_CONTEXT)
566 				count += 2 + ext->reg_count;
567 			else
568 				return 0;
569 		}
570 	}
571 
572 	/* set PA_SC_TILE_STEERING_OVERRIDE */
573 	count += 3;
574 	/* end clear state */
575 	count += 2;
576 	/* clear state */
577 	count += 2;
578 
579 	return count;
580 }
581 
582 static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev,
583 				    volatile u32 *buffer)
584 {
585 	u32 count = 0, i;
586 	const struct cs_section_def *sect = NULL;
587 	const struct cs_extent_def *ext = NULL;
588 	int ctx_reg_offset;
589 
590 	if (adev->gfx.rlc.cs_data == NULL)
591 		return;
592 	if (buffer == NULL)
593 		return;
594 
595 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
596 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
597 
598 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
599 	buffer[count++] = cpu_to_le32(0x80000000);
600 	buffer[count++] = cpu_to_le32(0x80000000);
601 
602 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
603 		for (ext = sect->section; ext->extent != NULL; ++ext) {
604 			if (sect->id == SECT_CONTEXT) {
605 				buffer[count++] =
606 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
607 				buffer[count++] = cpu_to_le32(ext->reg_index -
608 						PACKET3_SET_CONTEXT_REG_START);
609 				for (i = 0; i < ext->reg_count; i++)
610 					buffer[count++] = cpu_to_le32(ext->extent[i]);
611 			} else {
612 				return;
613 			}
614 		}
615 	}
616 
617 	ctx_reg_offset =
618 		SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
619 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
620 	buffer[count++] = cpu_to_le32(ctx_reg_offset);
621 	buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override);
622 
623 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
624 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
625 
626 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
627 	buffer[count++] = cpu_to_le32(0);
628 }
629 
630 static void gfx_v11_0_rlc_fini(struct amdgpu_device *adev)
631 {
632 	/* clear state block */
633 	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
634 			&adev->gfx.rlc.clear_state_gpu_addr,
635 			(void **)&adev->gfx.rlc.cs_ptr);
636 
637 	/* jump table block */
638 	amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
639 			&adev->gfx.rlc.cp_table_gpu_addr,
640 			(void **)&adev->gfx.rlc.cp_table_ptr);
641 }
642 
643 static void gfx_v11_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
644 {
645 	struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
646 
647 	reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl;
648 	reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
649 	reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1);
650 	reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2);
651 	reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG3);
652 	reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL);
653 	reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX);
654 	reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, regRLC_SPARE_INT_0);
655 	adev->gfx.rlc.rlcg_reg_access_supported = true;
656 }
657 
658 static int gfx_v11_0_rlc_init(struct amdgpu_device *adev)
659 {
660 	const struct cs_section_def *cs_data;
661 	int r;
662 
663 	adev->gfx.rlc.cs_data = gfx11_cs_data;
664 
665 	cs_data = adev->gfx.rlc.cs_data;
666 
667 	if (cs_data) {
668 		/* init clear state block */
669 		r = amdgpu_gfx_rlc_init_csb(adev);
670 		if (r)
671 			return r;
672 	}
673 
674 	/* init spm vmid with 0xf */
675 	if (adev->gfx.rlc.funcs->update_spm_vmid)
676 		adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
677 
678 	return 0;
679 }
680 
681 static void gfx_v11_0_mec_fini(struct amdgpu_device *adev)
682 {
683 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
684 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
685 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL);
686 }
687 
688 static void gfx_v11_0_me_init(struct amdgpu_device *adev)
689 {
690 	bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
691 
692 	amdgpu_gfx_graphics_queue_acquire(adev);
693 }
694 
695 static int gfx_v11_0_mec_init(struct amdgpu_device *adev)
696 {
697 	int r;
698 	u32 *hpd;
699 	size_t mec_hpd_size;
700 
701 	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
702 
703 	/* take ownership of the relevant compute queues */
704 	amdgpu_gfx_compute_queue_acquire(adev);
705 	mec_hpd_size = adev->gfx.num_compute_rings * GFX11_MEC_HPD_SIZE;
706 
707 	if (mec_hpd_size) {
708 		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
709 					      AMDGPU_GEM_DOMAIN_GTT,
710 					      &adev->gfx.mec.hpd_eop_obj,
711 					      &adev->gfx.mec.hpd_eop_gpu_addr,
712 					      (void **)&hpd);
713 		if (r) {
714 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
715 			gfx_v11_0_mec_fini(adev);
716 			return r;
717 		}
718 
719 		memset(hpd, 0, mec_hpd_size);
720 
721 		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
722 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
723 	}
724 
725 	return 0;
726 }
727 
728 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address)
729 {
730 	WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
731 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
732 		(address << SQ_IND_INDEX__INDEX__SHIFT));
733 	return RREG32_SOC15(GC, 0, regSQ_IND_DATA);
734 }
735 
736 static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave,
737 			   uint32_t thread, uint32_t regno,
738 			   uint32_t num, uint32_t *out)
739 {
740 	WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
741 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
742 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
743 		(thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) |
744 		(SQ_IND_INDEX__AUTO_INCR_MASK));
745 	while (num--)
746 		*(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA);
747 }
748 
749 static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
750 {
751 	/* in gfx11 the SIMD_ID is specified as part of the INSTANCE
752 	 * field when performing a select_se_sh so it should be
753 	 * zero here */
754 	WARN_ON(simd != 0);
755 
756 	/* type 2 wave data */
757 	dst[(*no_fields)++] = 2;
758 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS);
759 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO);
760 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI);
761 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO);
762 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI);
763 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1);
764 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2);
765 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC);
766 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC);
767 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAPSTS);
768 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS);
769 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2);
770 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1);
771 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0);
772 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE);
773 }
774 
775 static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
776 				     uint32_t wave, uint32_t start,
777 				     uint32_t size, uint32_t *dst)
778 {
779 	WARN_ON(simd != 0);
780 
781 	wave_read_regs(
782 		adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size,
783 		dst);
784 }
785 
786 static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
787 				      uint32_t wave, uint32_t thread,
788 				      uint32_t start, uint32_t size,
789 				      uint32_t *dst)
790 {
791 	wave_read_regs(
792 		adev, wave, thread,
793 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
794 }
795 
796 static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev,
797 									  u32 me, u32 pipe, u32 q, u32 vm)
798 {
799 	soc21_grbm_select(adev, me, pipe, q, vm);
800 }
801 
802 static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = {
803 	.get_gpu_clock_counter = &gfx_v11_0_get_gpu_clock_counter,
804 	.select_se_sh = &gfx_v11_0_select_se_sh,
805 	.read_wave_data = &gfx_v11_0_read_wave_data,
806 	.read_wave_sgprs = &gfx_v11_0_read_wave_sgprs,
807 	.read_wave_vgprs = &gfx_v11_0_read_wave_vgprs,
808 	.select_me_pipe_q = &gfx_v11_0_select_me_pipe_q,
809 	.update_perfmon_mgcg = &gfx_v11_0_update_perf_clk,
810 };
811 
812 static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev)
813 {
814 
815 	switch (adev->ip_versions[GC_HWIP][0]) {
816 	case IP_VERSION(11, 0, 0):
817 	case IP_VERSION(11, 0, 2):
818 	case IP_VERSION(11, 0, 3):
819 		adev->gfx.config.max_hw_contexts = 8;
820 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
821 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
822 		adev->gfx.config.sc_hiz_tile_fifo_size = 0;
823 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
824 		break;
825 	case IP_VERSION(11, 0, 1):
826 	case IP_VERSION(11, 0, 4):
827 		adev->gfx.config.max_hw_contexts = 8;
828 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
829 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
830 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
831 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x300;
832 		break;
833 	default:
834 		BUG();
835 		break;
836 	}
837 
838 	return 0;
839 }
840 
841 static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
842 				   int me, int pipe, int queue)
843 {
844 	int r;
845 	struct amdgpu_ring *ring;
846 	unsigned int irq_type;
847 
848 	ring = &adev->gfx.gfx_ring[ring_id];
849 
850 	ring->me = me;
851 	ring->pipe = pipe;
852 	ring->queue = queue;
853 
854 	ring->ring_obj = NULL;
855 	ring->use_doorbell = true;
856 
857 	if (!ring_id)
858 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
859 	else
860 		ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1;
861 	sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
862 
863 	irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
864 	r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
865 			     AMDGPU_RING_PRIO_DEFAULT, NULL);
866 	if (r)
867 		return r;
868 	return 0;
869 }
870 
871 static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
872 				       int mec, int pipe, int queue)
873 {
874 	int r;
875 	unsigned irq_type;
876 	struct amdgpu_ring *ring;
877 	unsigned int hw_prio;
878 
879 	ring = &adev->gfx.compute_ring[ring_id];
880 
881 	/* mec0 is me1 */
882 	ring->me = mec + 1;
883 	ring->pipe = pipe;
884 	ring->queue = queue;
885 
886 	ring->ring_obj = NULL;
887 	ring->use_doorbell = true;
888 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
889 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
890 				+ (ring_id * GFX11_MEC_HPD_SIZE);
891 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
892 
893 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
894 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
895 		+ ring->pipe;
896 	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
897 			AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
898 	/* type-2 packets are deprecated on MEC, use type-3 instead */
899 	r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
900 			     hw_prio, NULL);
901 	if (r)
902 		return r;
903 
904 	return 0;
905 }
906 
907 static struct {
908 	SOC21_FIRMWARE_ID	id;
909 	unsigned int		offset;
910 	unsigned int		size;
911 } rlc_autoload_info[SOC21_FIRMWARE_ID_MAX];
912 
913 static void gfx_v11_0_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc)
914 {
915 	RLC_TABLE_OF_CONTENT *ucode = rlc_toc;
916 
917 	while (ucode && (ucode->id > SOC21_FIRMWARE_ID_INVALID) &&
918 			(ucode->id < SOC21_FIRMWARE_ID_MAX)) {
919 		rlc_autoload_info[ucode->id].id = ucode->id;
920 		rlc_autoload_info[ucode->id].offset = ucode->offset * 4;
921 		rlc_autoload_info[ucode->id].size = ucode->size * 4;
922 
923 		ucode++;
924 	}
925 }
926 
927 static uint32_t gfx_v11_0_calc_toc_total_size(struct amdgpu_device *adev)
928 {
929 	uint32_t total_size = 0;
930 	SOC21_FIRMWARE_ID id;
931 
932 	gfx_v11_0_parse_rlc_toc(adev, adev->psp.toc.start_addr);
933 
934 	for (id = SOC21_FIRMWARE_ID_RLC_G_UCODE; id < SOC21_FIRMWARE_ID_MAX; id++)
935 		total_size += rlc_autoload_info[id].size;
936 
937 	/* In case the offset in rlc toc ucode is aligned */
938 	if (total_size < rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset)
939 		total_size = rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset +
940 			rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].size;
941 
942 	return total_size;
943 }
944 
945 static int gfx_v11_0_rlc_autoload_buffer_init(struct amdgpu_device *adev)
946 {
947 	int r;
948 	uint32_t total_size;
949 
950 	total_size = gfx_v11_0_calc_toc_total_size(adev);
951 
952 	r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024,
953 				      AMDGPU_GEM_DOMAIN_VRAM |
954 				      AMDGPU_GEM_DOMAIN_GTT,
955 				      &adev->gfx.rlc.rlc_autoload_bo,
956 				      &adev->gfx.rlc.rlc_autoload_gpu_addr,
957 				      (void **)&adev->gfx.rlc.rlc_autoload_ptr);
958 
959 	if (r) {
960 		dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r);
961 		return r;
962 	}
963 
964 	return 0;
965 }
966 
967 static void gfx_v11_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev,
968 					      SOC21_FIRMWARE_ID id,
969 			    		      const void *fw_data,
970 					      uint32_t fw_size,
971 					      uint32_t *fw_autoload_mask)
972 {
973 	uint32_t toc_offset;
974 	uint32_t toc_fw_size;
975 	char *ptr = adev->gfx.rlc.rlc_autoload_ptr;
976 
977 	if (id <= SOC21_FIRMWARE_ID_INVALID || id >= SOC21_FIRMWARE_ID_MAX)
978 		return;
979 
980 	toc_offset = rlc_autoload_info[id].offset;
981 	toc_fw_size = rlc_autoload_info[id].size;
982 
983 	if (fw_size == 0)
984 		fw_size = toc_fw_size;
985 
986 	if (fw_size > toc_fw_size)
987 		fw_size = toc_fw_size;
988 
989 	memcpy(ptr + toc_offset, fw_data, fw_size);
990 
991 	if (fw_size < toc_fw_size)
992 		memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size);
993 
994 	if ((id != SOC21_FIRMWARE_ID_RS64_PFP) && (id != SOC21_FIRMWARE_ID_RS64_ME))
995 		*(uint64_t *)fw_autoload_mask |= 1ULL << id;
996 }
997 
998 static void gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev,
999 							uint32_t *fw_autoload_mask)
1000 {
1001 	void *data;
1002 	uint32_t size;
1003 	uint64_t *toc_ptr;
1004 
1005 	*(uint64_t *)fw_autoload_mask |= 0x1;
1006 
1007 	DRM_DEBUG("rlc autoload enabled fw: 0x%llx\n", *(uint64_t *)fw_autoload_mask);
1008 
1009 	data = adev->psp.toc.start_addr;
1010 	size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_TOC].size;
1011 
1012 	toc_ptr = (uint64_t *)data + size / 8 - 1;
1013 	*toc_ptr = *(uint64_t *)fw_autoload_mask;
1014 
1015 	gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_TOC,
1016 					data, size, fw_autoload_mask);
1017 }
1018 
1019 static void gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev,
1020 							uint32_t *fw_autoload_mask)
1021 {
1022 	const __le32 *fw_data;
1023 	uint32_t fw_size;
1024 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1025 	const struct gfx_firmware_header_v2_0 *cpv2_hdr;
1026 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1027 	const struct rlc_firmware_header_v2_2 *rlcv22_hdr;
1028 	uint16_t version_major, version_minor;
1029 
1030 	if (adev->gfx.rs64_enable) {
1031 		/* pfp ucode */
1032 		cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1033 			adev->gfx.pfp_fw->data;
1034 		/* instruction */
1035 		fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1036 			le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1037 		fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1038 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP,
1039 						fw_data, fw_size, fw_autoload_mask);
1040 		/* data */
1041 		fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1042 			le32_to_cpu(cpv2_hdr->data_offset_bytes));
1043 		fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1044 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK,
1045 						fw_data, fw_size, fw_autoload_mask);
1046 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P1_STACK,
1047 						fw_data, fw_size, fw_autoload_mask);
1048 		/* me ucode */
1049 		cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1050 			adev->gfx.me_fw->data;
1051 		/* instruction */
1052 		fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1053 			le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1054 		fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1055 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME,
1056 						fw_data, fw_size, fw_autoload_mask);
1057 		/* data */
1058 		fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1059 			le32_to_cpu(cpv2_hdr->data_offset_bytes));
1060 		fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1061 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P0_STACK,
1062 						fw_data, fw_size, fw_autoload_mask);
1063 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P1_STACK,
1064 						fw_data, fw_size, fw_autoload_mask);
1065 		/* mec ucode */
1066 		cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1067 			adev->gfx.mec_fw->data;
1068 		/* instruction */
1069 		fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1070 			le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1071 		fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1072 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC,
1073 						fw_data, fw_size, fw_autoload_mask);
1074 		/* data */
1075 		fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1076 			le32_to_cpu(cpv2_hdr->data_offset_bytes));
1077 		fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1078 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK,
1079 						fw_data, fw_size, fw_autoload_mask);
1080 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P1_STACK,
1081 						fw_data, fw_size, fw_autoload_mask);
1082 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P2_STACK,
1083 						fw_data, fw_size, fw_autoload_mask);
1084 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P3_STACK,
1085 						fw_data, fw_size, fw_autoload_mask);
1086 	} else {
1087 		/* pfp ucode */
1088 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1089 			adev->gfx.pfp_fw->data;
1090 		fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1091 				le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
1092 		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1093 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_PFP,
1094 						fw_data, fw_size, fw_autoload_mask);
1095 
1096 		/* me ucode */
1097 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1098 			adev->gfx.me_fw->data;
1099 		fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1100 				le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
1101 		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1102 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_ME,
1103 						fw_data, fw_size, fw_autoload_mask);
1104 
1105 		/* mec ucode */
1106 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1107 			adev->gfx.mec_fw->data;
1108 		fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1109 				le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
1110 		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
1111 			cp_hdr->jt_size * 4;
1112 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_MEC,
1113 						fw_data, fw_size, fw_autoload_mask);
1114 	}
1115 
1116 	/* rlc ucode */
1117 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)
1118 		adev->gfx.rlc_fw->data;
1119 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1120 			le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes));
1121 	fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes);
1122 	gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_G_UCODE,
1123 					fw_data, fw_size, fw_autoload_mask);
1124 
1125 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1126 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1127 	if (version_major == 2) {
1128 		if (version_minor >= 2) {
1129 			rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
1130 
1131 			fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1132 					le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes));
1133 			fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes);
1134 			gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_UCODE,
1135 					fw_data, fw_size, fw_autoload_mask);
1136 
1137 			fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1138 					le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes));
1139 			fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes);
1140 			gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_DRAM_BOOT,
1141 					fw_data, fw_size, fw_autoload_mask);
1142 		}
1143 	}
1144 }
1145 
1146 static void gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev,
1147 							uint32_t *fw_autoload_mask)
1148 {
1149 	const __le32 *fw_data;
1150 	uint32_t fw_size;
1151 	const struct sdma_firmware_header_v2_0 *sdma_hdr;
1152 
1153 	sdma_hdr = (const struct sdma_firmware_header_v2_0 *)
1154 		adev->sdma.instance[0].fw->data;
1155 	fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
1156 			le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes));
1157 	fw_size = le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes);
1158 
1159 	gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1160 			SOC21_FIRMWARE_ID_SDMA_UCODE_TH0, fw_data, fw_size, fw_autoload_mask);
1161 
1162 	fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
1163 			le32_to_cpu(sdma_hdr->ctl_ucode_offset));
1164 	fw_size = le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes);
1165 
1166 	gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1167 			SOC21_FIRMWARE_ID_SDMA_UCODE_TH1, fw_data, fw_size, fw_autoload_mask);
1168 }
1169 
1170 static void gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev,
1171 							uint32_t *fw_autoload_mask)
1172 {
1173 	const __le32 *fw_data;
1174 	unsigned fw_size;
1175 	const struct mes_firmware_header_v1_0 *mes_hdr;
1176 	int pipe, ucode_id, data_id;
1177 
1178 	for (pipe = 0; pipe < 2; pipe++) {
1179 		if (pipe==0) {
1180 			ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P0;
1181 			data_id  = SOC21_FIRMWARE_ID_RS64_MES_P0_STACK;
1182 		} else {
1183 			ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P1;
1184 			data_id  = SOC21_FIRMWARE_ID_RS64_MES_P1_STACK;
1185 		}
1186 
1187 		mes_hdr = (const struct mes_firmware_header_v1_0 *)
1188 			adev->mes.fw[pipe]->data;
1189 
1190 		fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
1191 				le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
1192 		fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
1193 
1194 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1195 				ucode_id, fw_data, fw_size, fw_autoload_mask);
1196 
1197 		fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
1198 				le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
1199 		fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
1200 
1201 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1202 				data_id, fw_data, fw_size, fw_autoload_mask);
1203 	}
1204 }
1205 
1206 static int gfx_v11_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev)
1207 {
1208 	uint32_t rlc_g_offset, rlc_g_size;
1209 	uint64_t gpu_addr;
1210 	uint32_t autoload_fw_id[2];
1211 
1212 	memset(autoload_fw_id, 0, sizeof(uint32_t) * 2);
1213 
1214 	/* RLC autoload sequence 2: copy ucode */
1215 	gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(adev, autoload_fw_id);
1216 	gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(adev, autoload_fw_id);
1217 	gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(adev, autoload_fw_id);
1218 	gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(adev, autoload_fw_id);
1219 
1220 	rlc_g_offset = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].offset;
1221 	rlc_g_size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].size;
1222 	gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset;
1223 
1224 	WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, upper_32_bits(gpu_addr));
1225 	WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, lower_32_bits(gpu_addr));
1226 
1227 	WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_SIZE, rlc_g_size);
1228 
1229 	/* RLC autoload sequence 3: load IMU fw */
1230 	if (adev->gfx.imu.funcs->load_microcode)
1231 		adev->gfx.imu.funcs->load_microcode(adev);
1232 	/* RLC autoload sequence 4 init IMU fw */
1233 	if (adev->gfx.imu.funcs->setup_imu)
1234 		adev->gfx.imu.funcs->setup_imu(adev);
1235 	if (adev->gfx.imu.funcs->start_imu)
1236 		adev->gfx.imu.funcs->start_imu(adev);
1237 
1238 	/* RLC autoload sequence 5 disable gpa mode */
1239 	gfx_v11_0_disable_gpa_mode(adev);
1240 
1241 	return 0;
1242 }
1243 
1244 static int gfx_v11_0_sw_init(void *handle)
1245 {
1246 	int i, j, k, r, ring_id = 0;
1247 	struct amdgpu_kiq *kiq;
1248 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1249 
1250 	adev->gfxhub.funcs->init(adev);
1251 
1252 	switch (adev->ip_versions[GC_HWIP][0]) {
1253 	case IP_VERSION(11, 0, 0):
1254 	case IP_VERSION(11, 0, 1):
1255 	case IP_VERSION(11, 0, 2):
1256 	case IP_VERSION(11, 0, 3):
1257 	case IP_VERSION(11, 0, 4):
1258 		adev->gfx.me.num_me = 1;
1259 		adev->gfx.me.num_pipe_per_me = 1;
1260 		adev->gfx.me.num_queue_per_pipe = 1;
1261 		adev->gfx.mec.num_mec = 2;
1262 		adev->gfx.mec.num_pipe_per_mec = 4;
1263 		adev->gfx.mec.num_queue_per_pipe = 4;
1264 		break;
1265 	default:
1266 		adev->gfx.me.num_me = 1;
1267 		adev->gfx.me.num_pipe_per_me = 1;
1268 		adev->gfx.me.num_queue_per_pipe = 1;
1269 		adev->gfx.mec.num_mec = 1;
1270 		adev->gfx.mec.num_pipe_per_mec = 4;
1271 		adev->gfx.mec.num_queue_per_pipe = 8;
1272 		break;
1273 	}
1274 
1275 	/* EOP Event */
1276 	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1277 			      GFX_11_0_0__SRCID__CP_EOP_INTERRUPT,
1278 			      &adev->gfx.eop_irq);
1279 	if (r)
1280 		return r;
1281 
1282 	/* Privileged reg */
1283 	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1284 			      GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT,
1285 			      &adev->gfx.priv_reg_irq);
1286 	if (r)
1287 		return r;
1288 
1289 	/* Privileged inst */
1290 	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1291 			      GFX_11_0_0__SRCID__CP_PRIV_INSTR_FAULT,
1292 			      &adev->gfx.priv_inst_irq);
1293 	if (r)
1294 		return r;
1295 
1296 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1297 
1298 	if (adev->gfx.imu.funcs) {
1299 		if (adev->gfx.imu.funcs->init_microcode) {
1300 			r = adev->gfx.imu.funcs->init_microcode(adev);
1301 			if (r)
1302 				DRM_ERROR("Failed to load imu firmware!\n");
1303 		}
1304 	}
1305 
1306 	gfx_v11_0_me_init(adev);
1307 
1308 	r = gfx_v11_0_rlc_init(adev);
1309 	if (r) {
1310 		DRM_ERROR("Failed to init rlc BOs!\n");
1311 		return r;
1312 	}
1313 
1314 	r = gfx_v11_0_mec_init(adev);
1315 	if (r) {
1316 		DRM_ERROR("Failed to init MEC BOs!\n");
1317 		return r;
1318 	}
1319 
1320 	/* set up the gfx ring */
1321 	for (i = 0; i < adev->gfx.me.num_me; i++) {
1322 		for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
1323 			for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
1324 				if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
1325 					continue;
1326 
1327 				r = gfx_v11_0_gfx_ring_init(adev, ring_id,
1328 							    i, k, j);
1329 				if (r)
1330 					return r;
1331 				ring_id++;
1332 			}
1333 		}
1334 	}
1335 
1336 	ring_id = 0;
1337 	/* set up the compute queues - allocate horizontally across pipes */
1338 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1339 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1340 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1341 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k,
1342 								     j))
1343 					continue;
1344 
1345 				r = gfx_v11_0_compute_ring_init(adev, ring_id,
1346 								i, k, j);
1347 				if (r)
1348 					return r;
1349 
1350 				ring_id++;
1351 			}
1352 		}
1353 	}
1354 
1355 	if (!adev->enable_mes_kiq) {
1356 		r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE);
1357 		if (r) {
1358 			DRM_ERROR("Failed to init KIQ BOs!\n");
1359 			return r;
1360 		}
1361 
1362 		kiq = &adev->gfx.kiq;
1363 		r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
1364 		if (r)
1365 			return r;
1366 	}
1367 
1368 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v11_compute_mqd));
1369 	if (r)
1370 		return r;
1371 
1372 	/* allocate visible FB for rlc auto-loading fw */
1373 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
1374 		r = gfx_v11_0_rlc_autoload_buffer_init(adev);
1375 		if (r)
1376 			return r;
1377 	}
1378 
1379 	r = gfx_v11_0_gpu_early_init(adev);
1380 	if (r)
1381 		return r;
1382 
1383 	return 0;
1384 }
1385 
1386 static void gfx_v11_0_pfp_fini(struct amdgpu_device *adev)
1387 {
1388 	amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj,
1389 			      &adev->gfx.pfp.pfp_fw_gpu_addr,
1390 			      (void **)&adev->gfx.pfp.pfp_fw_ptr);
1391 
1392 	amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_data_obj,
1393 			      &adev->gfx.pfp.pfp_fw_data_gpu_addr,
1394 			      (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
1395 }
1396 
1397 static void gfx_v11_0_me_fini(struct amdgpu_device *adev)
1398 {
1399 	amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,
1400 			      &adev->gfx.me.me_fw_gpu_addr,
1401 			      (void **)&adev->gfx.me.me_fw_ptr);
1402 
1403 	amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_data_obj,
1404 			       &adev->gfx.me.me_fw_data_gpu_addr,
1405 			       (void **)&adev->gfx.me.me_fw_data_ptr);
1406 }
1407 
1408 static void gfx_v11_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev)
1409 {
1410 	amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo,
1411 			&adev->gfx.rlc.rlc_autoload_gpu_addr,
1412 			(void **)&adev->gfx.rlc.rlc_autoload_ptr);
1413 }
1414 
1415 static int gfx_v11_0_sw_fini(void *handle)
1416 {
1417 	int i;
1418 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1419 
1420 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1421 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1422 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
1423 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1424 
1425 	amdgpu_gfx_mqd_sw_fini(adev);
1426 
1427 	if (!adev->enable_mes_kiq) {
1428 		amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
1429 		amdgpu_gfx_kiq_fini(adev);
1430 	}
1431 
1432 	gfx_v11_0_pfp_fini(adev);
1433 	gfx_v11_0_me_fini(adev);
1434 	gfx_v11_0_rlc_fini(adev);
1435 	gfx_v11_0_mec_fini(adev);
1436 
1437 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
1438 		gfx_v11_0_rlc_autoload_buffer_fini(adev);
1439 
1440 	gfx_v11_0_free_microcode(adev);
1441 
1442 	return 0;
1443 }
1444 
1445 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
1446 				   u32 sh_num, u32 instance)
1447 {
1448 	u32 data;
1449 
1450 	if (instance == 0xffffffff)
1451 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
1452 				     INSTANCE_BROADCAST_WRITES, 1);
1453 	else
1454 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX,
1455 				     instance);
1456 
1457 	if (se_num == 0xffffffff)
1458 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES,
1459 				     1);
1460 	else
1461 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
1462 
1463 	if (sh_num == 0xffffffff)
1464 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES,
1465 				     1);
1466 	else
1467 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num);
1468 
1469 	WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data);
1470 }
1471 
1472 static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1473 {
1474 	u32 data, mask;
1475 
1476 	data = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE);
1477 	data |= RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE);
1478 
1479 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1480 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
1481 
1482 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
1483 					 adev->gfx.config.max_sh_per_se);
1484 
1485 	return (~data) & mask;
1486 }
1487 
1488 static void gfx_v11_0_setup_rb(struct amdgpu_device *adev)
1489 {
1490 	int i, j;
1491 	u32 data;
1492 	u32 active_rbs = 0;
1493 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
1494 					adev->gfx.config.max_sh_per_se;
1495 
1496 	mutex_lock(&adev->grbm_idx_mutex);
1497 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1498 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1499 			gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff);
1500 			data = gfx_v11_0_get_rb_active_bitmap(adev);
1501 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
1502 					       rb_bitmap_width_per_sh);
1503 		}
1504 	}
1505 	gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1506 	mutex_unlock(&adev->grbm_idx_mutex);
1507 
1508 	adev->gfx.config.backend_enable_mask = active_rbs;
1509 	adev->gfx.config.num_rbs = hweight32(active_rbs);
1510 }
1511 
1512 #define DEFAULT_SH_MEM_BASES	(0x6000)
1513 #define LDS_APP_BASE           0x1
1514 #define SCRATCH_APP_BASE       0x2
1515 
1516 static void gfx_v11_0_init_compute_vmid(struct amdgpu_device *adev)
1517 {
1518 	int i;
1519 	uint32_t sh_mem_bases;
1520 	uint32_t data;
1521 
1522 	/*
1523 	 * Configure apertures:
1524 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
1525 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
1526 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
1527 	 */
1528 	sh_mem_bases = (LDS_APP_BASE << SH_MEM_BASES__SHARED_BASE__SHIFT) |
1529 			SCRATCH_APP_BASE;
1530 
1531 	mutex_lock(&adev->srbm_mutex);
1532 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
1533 		soc21_grbm_select(adev, 0, 0, 0, i);
1534 		/* CP and shaders */
1535 		WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
1536 		WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases);
1537 
1538 		/* Enable trap for each kfd vmid. */
1539 		data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL);
1540 		data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
1541 	}
1542 	soc21_grbm_select(adev, 0, 0, 0, 0);
1543 	mutex_unlock(&adev->srbm_mutex);
1544 
1545 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
1546 	   acccess. These should be enabled by FW for target VMIDs. */
1547 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
1548 		WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * i, 0);
1549 		WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * i, 0);
1550 		WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, i, 0);
1551 		WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, i, 0);
1552 	}
1553 }
1554 
1555 static void gfx_v11_0_init_gds_vmid(struct amdgpu_device *adev)
1556 {
1557 	int vmid;
1558 
1559 	/*
1560 	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
1561 	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
1562 	 * the driver can enable them for graphics. VMID0 should maintain
1563 	 * access so that HWS firmware can save/restore entries.
1564 	 */
1565 	for (vmid = 1; vmid < 16; vmid++) {
1566 		WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * vmid, 0);
1567 		WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * vmid, 0);
1568 		WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, vmid, 0);
1569 		WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, vmid, 0);
1570 	}
1571 }
1572 
1573 static void gfx_v11_0_tcp_harvest(struct amdgpu_device *adev)
1574 {
1575 	/* TODO: harvest feature to be added later. */
1576 }
1577 
1578 static void gfx_v11_0_get_tcc_info(struct amdgpu_device *adev)
1579 {
1580 	/* TCCs are global (not instanced). */
1581 	uint32_t tcc_disable = RREG32_SOC15(GC, 0, regCGTS_TCC_DISABLE) |
1582 			       RREG32_SOC15(GC, 0, regCGTS_USER_TCC_DISABLE);
1583 
1584 	adev->gfx.config.tcc_disabled_mask =
1585 		REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) |
1586 		(REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16);
1587 }
1588 
1589 static void gfx_v11_0_constants_init(struct amdgpu_device *adev)
1590 {
1591 	u32 tmp;
1592 	int i;
1593 
1594 	if (!amdgpu_sriov_vf(adev))
1595 		WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
1596 
1597 	gfx_v11_0_setup_rb(adev);
1598 	gfx_v11_0_get_cu_info(adev, &adev->gfx.cu_info);
1599 	gfx_v11_0_get_tcc_info(adev);
1600 	adev->gfx.config.pa_sc_tile_steering_override = 0;
1601 
1602 	/* XXX SH_MEM regs */
1603 	/* where to put LDS, scratch, GPUVM in FSA64 space */
1604 	mutex_lock(&adev->srbm_mutex);
1605 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
1606 		soc21_grbm_select(adev, 0, 0, 0, i);
1607 		/* CP and shaders */
1608 		WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
1609 		if (i != 0) {
1610 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
1611 				(adev->gmc.private_aperture_start >> 48));
1612 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
1613 				(adev->gmc.shared_aperture_start >> 48));
1614 			WREG32_SOC15(GC, 0, regSH_MEM_BASES, tmp);
1615 		}
1616 	}
1617 	soc21_grbm_select(adev, 0, 0, 0, 0);
1618 
1619 	mutex_unlock(&adev->srbm_mutex);
1620 
1621 	gfx_v11_0_init_compute_vmid(adev);
1622 	gfx_v11_0_init_gds_vmid(adev);
1623 }
1624 
1625 static void gfx_v11_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
1626 					       bool enable)
1627 {
1628 	u32 tmp;
1629 
1630 	if (amdgpu_sriov_vf(adev))
1631 		return;
1632 
1633 	tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0);
1634 
1635 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
1636 			    enable ? 1 : 0);
1637 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
1638 			    enable ? 1 : 0);
1639 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
1640 			    enable ? 1 : 0);
1641 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
1642 			    enable ? 1 : 0);
1643 
1644 	WREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0, tmp);
1645 }
1646 
1647 static int gfx_v11_0_init_csb(struct amdgpu_device *adev)
1648 {
1649 	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
1650 
1651 	WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_HI,
1652 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
1653 	WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_LO,
1654 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
1655 	WREG32_SOC15(GC, 0, regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
1656 
1657 	return 0;
1658 }
1659 
1660 static void gfx_v11_0_rlc_stop(struct amdgpu_device *adev)
1661 {
1662 	u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL);
1663 
1664 	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
1665 	WREG32_SOC15(GC, 0, regRLC_CNTL, tmp);
1666 }
1667 
1668 static void gfx_v11_0_rlc_reset(struct amdgpu_device *adev)
1669 {
1670 	WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
1671 	udelay(50);
1672 	WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
1673 	udelay(50);
1674 }
1675 
1676 static void gfx_v11_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev,
1677 					     bool enable)
1678 {
1679 	uint32_t rlc_pg_cntl;
1680 
1681 	rlc_pg_cntl = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
1682 
1683 	if (!enable) {
1684 		/* RLC_PG_CNTL[23] = 0 (default)
1685 		 * RLC will wait for handshake acks with SMU
1686 		 * GFXOFF will be enabled
1687 		 * RLC_PG_CNTL[23] = 1
1688 		 * RLC will not issue any message to SMU
1689 		 * hence no handshake between SMU & RLC
1690 		 * GFXOFF will be disabled
1691 		 */
1692 		rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
1693 	} else
1694 		rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
1695 	WREG32_SOC15(GC, 0, regRLC_PG_CNTL, rlc_pg_cntl);
1696 }
1697 
1698 static void gfx_v11_0_rlc_start(struct amdgpu_device *adev)
1699 {
1700 	/* TODO: enable rlc & smu handshake until smu
1701 	 * and gfxoff feature works as expected */
1702 	if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK))
1703 		gfx_v11_0_rlc_smu_handshake_cntl(adev, false);
1704 
1705 	WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
1706 	udelay(50);
1707 }
1708 
1709 static void gfx_v11_0_rlc_enable_srm(struct amdgpu_device *adev)
1710 {
1711 	uint32_t tmp;
1712 
1713 	/* enable Save Restore Machine */
1714 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL));
1715 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
1716 	tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
1717 	WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL), tmp);
1718 }
1719 
1720 static void gfx_v11_0_load_rlcg_microcode(struct amdgpu_device *adev)
1721 {
1722 	const struct rlc_firmware_header_v2_0 *hdr;
1723 	const __le32 *fw_data;
1724 	unsigned i, fw_size;
1725 
1726 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1727 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1728 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1729 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1730 
1731 	WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR,
1732 		     RLCG_UCODE_LOADING_START_ADDRESS);
1733 
1734 	for (i = 0; i < fw_size; i++)
1735 		WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA,
1736 			     le32_to_cpup(fw_data++));
1737 
1738 	WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
1739 }
1740 
1741 static void gfx_v11_0_load_rlc_iram_dram_microcode(struct amdgpu_device *adev)
1742 {
1743 	const struct rlc_firmware_header_v2_2 *hdr;
1744 	const __le32 *fw_data;
1745 	unsigned i, fw_size;
1746 	u32 tmp;
1747 
1748 	hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
1749 
1750 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1751 			le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes));
1752 	fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4;
1753 
1754 	WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, 0);
1755 
1756 	for (i = 0; i < fw_size; i++) {
1757 		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
1758 			msleep(1);
1759 		WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_DATA,
1760 				le32_to_cpup(fw_data++));
1761 	}
1762 
1763 	WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
1764 
1765 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1766 			le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes));
1767 	fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4;
1768 
1769 	WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_ADDR, 0);
1770 	for (i = 0; i < fw_size; i++) {
1771 		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
1772 			msleep(1);
1773 		WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_DATA,
1774 				le32_to_cpup(fw_data++));
1775 	}
1776 
1777 	WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
1778 
1779 	tmp = RREG32_SOC15(GC, 0, regRLC_LX6_CNTL);
1780 	tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1);
1781 	tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0);
1782 	WREG32_SOC15(GC, 0, regRLC_LX6_CNTL, tmp);
1783 }
1784 
1785 static void gfx_v11_0_load_rlcp_rlcv_microcode(struct amdgpu_device *adev)
1786 {
1787 	const struct rlc_firmware_header_v2_3 *hdr;
1788 	const __le32 *fw_data;
1789 	unsigned i, fw_size;
1790 	u32 tmp;
1791 
1792 	hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data;
1793 
1794 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1795 			le32_to_cpu(hdr->rlcp_ucode_offset_bytes));
1796 	fw_size = le32_to_cpu(hdr->rlcp_ucode_size_bytes) / 4;
1797 
1798 	WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, 0);
1799 
1800 	for (i = 0; i < fw_size; i++) {
1801 		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
1802 			msleep(1);
1803 		WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_DATA,
1804 				le32_to_cpup(fw_data++));
1805 	}
1806 
1807 	WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, adev->gfx.rlc_fw_version);
1808 
1809 	tmp = RREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE);
1810 	tmp = REG_SET_FIELD(tmp, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1);
1811 	WREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE, tmp);
1812 
1813 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1814 			le32_to_cpu(hdr->rlcv_ucode_offset_bytes));
1815 	fw_size = le32_to_cpu(hdr->rlcv_ucode_size_bytes) / 4;
1816 
1817 	WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, 0);
1818 
1819 	for (i = 0; i < fw_size; i++) {
1820 		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
1821 			msleep(1);
1822 		WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_DATA,
1823 				le32_to_cpup(fw_data++));
1824 	}
1825 
1826 	WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, adev->gfx.rlc_fw_version);
1827 
1828 	tmp = RREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL);
1829 	tmp = REG_SET_FIELD(tmp, RLC_GPU_IOV_F32_CNTL, ENABLE, 1);
1830 	WREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL, tmp);
1831 }
1832 
1833 static int gfx_v11_0_rlc_load_microcode(struct amdgpu_device *adev)
1834 {
1835 	const struct rlc_firmware_header_v2_0 *hdr;
1836 	uint16_t version_major;
1837 	uint16_t version_minor;
1838 
1839 	if (!adev->gfx.rlc_fw)
1840 		return -EINVAL;
1841 
1842 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1843 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
1844 
1845 	version_major = le16_to_cpu(hdr->header.header_version_major);
1846 	version_minor = le16_to_cpu(hdr->header.header_version_minor);
1847 
1848 	if (version_major == 2) {
1849 		gfx_v11_0_load_rlcg_microcode(adev);
1850 		if (amdgpu_dpm == 1) {
1851 			if (version_minor >= 2)
1852 				gfx_v11_0_load_rlc_iram_dram_microcode(adev);
1853 			if (version_minor == 3)
1854 				gfx_v11_0_load_rlcp_rlcv_microcode(adev);
1855 		}
1856 
1857 		return 0;
1858 	}
1859 
1860 	return -EINVAL;
1861 }
1862 
1863 static int gfx_v11_0_rlc_resume(struct amdgpu_device *adev)
1864 {
1865 	int r;
1866 
1867 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1868 		gfx_v11_0_init_csb(adev);
1869 
1870 		if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */
1871 			gfx_v11_0_rlc_enable_srm(adev);
1872 	} else {
1873 		if (amdgpu_sriov_vf(adev)) {
1874 			gfx_v11_0_init_csb(adev);
1875 			return 0;
1876 		}
1877 
1878 		adev->gfx.rlc.funcs->stop(adev);
1879 
1880 		/* disable CG */
1881 		WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0);
1882 
1883 		/* disable PG */
1884 		WREG32_SOC15(GC, 0, regRLC_PG_CNTL, 0);
1885 
1886 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
1887 			/* legacy rlc firmware loading */
1888 			r = gfx_v11_0_rlc_load_microcode(adev);
1889 			if (r)
1890 				return r;
1891 		}
1892 
1893 		gfx_v11_0_init_csb(adev);
1894 
1895 		adev->gfx.rlc.funcs->start(adev);
1896 	}
1897 	return 0;
1898 }
1899 
1900 static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr)
1901 {
1902 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
1903 	uint32_t tmp;
1904 	int i;
1905 
1906 	/* Trigger an invalidation of the L1 instruction caches */
1907 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
1908 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1);
1909 	WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
1910 
1911 	/* Wait for invalidation complete */
1912 	for (i = 0; i < usec_timeout; i++) {
1913 		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
1914 		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
1915 					INVALIDATE_CACHE_COMPLETE))
1916 			break;
1917 		udelay(1);
1918 	}
1919 
1920 	if (i >= usec_timeout) {
1921 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
1922 		return -EINVAL;
1923 	}
1924 
1925 	if (amdgpu_emu_mode == 1)
1926 		adev->hdp.funcs->flush_hdp(adev, NULL);
1927 
1928 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
1929 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
1930 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
1931 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
1932 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
1933 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
1934 
1935 	/* Program me ucode address into intruction cache address register */
1936 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
1937 			lower_32_bits(addr) & 0xFFFFF000);
1938 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
1939 			upper_32_bits(addr));
1940 
1941 	return 0;
1942 }
1943 
1944 static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr)
1945 {
1946 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
1947 	uint32_t tmp;
1948 	int i;
1949 
1950 	/* Trigger an invalidation of the L1 instruction caches */
1951 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
1952 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1);
1953 	WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
1954 
1955 	/* Wait for invalidation complete */
1956 	for (i = 0; i < usec_timeout; i++) {
1957 		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
1958 		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
1959 					INVALIDATE_CACHE_COMPLETE))
1960 			break;
1961 		udelay(1);
1962 	}
1963 
1964 	if (i >= usec_timeout) {
1965 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
1966 		return -EINVAL;
1967 	}
1968 
1969 	if (amdgpu_emu_mode == 1)
1970 		adev->hdp.funcs->flush_hdp(adev, NULL);
1971 
1972 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
1973 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
1974 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
1975 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
1976 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
1977 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
1978 
1979 	/* Program pfp ucode address into intruction cache address register */
1980 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
1981 			lower_32_bits(addr) & 0xFFFFF000);
1982 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
1983 			upper_32_bits(addr));
1984 
1985 	return 0;
1986 }
1987 
1988 static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr)
1989 {
1990 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
1991 	uint32_t tmp;
1992 	int i;
1993 
1994 	/* Trigger an invalidation of the L1 instruction caches */
1995 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
1996 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
1997 
1998 	WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
1999 
2000 	/* Wait for invalidation complete */
2001 	for (i = 0; i < usec_timeout; i++) {
2002 		tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2003 		if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
2004 					INVALIDATE_CACHE_COMPLETE))
2005 			break;
2006 		udelay(1);
2007 	}
2008 
2009 	if (i >= usec_timeout) {
2010 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2011 		return -EINVAL;
2012 	}
2013 
2014 	if (amdgpu_emu_mode == 1)
2015 		adev->hdp.funcs->flush_hdp(adev, NULL);
2016 
2017 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
2018 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2019 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
2020 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
2021 	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
2022 
2023 	/* Program mec1 ucode address into intruction cache address register */
2024 	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO,
2025 			lower_32_bits(addr) & 0xFFFFF000);
2026 	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
2027 			upper_32_bits(addr));
2028 
2029 	return 0;
2030 }
2031 
2032 static int gfx_v11_0_config_pfp_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
2033 {
2034 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2035 	uint32_t tmp;
2036 	unsigned i, pipe_id;
2037 	const struct gfx_firmware_header_v2_0 *pfp_hdr;
2038 
2039 	pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
2040 		adev->gfx.pfp_fw->data;
2041 
2042 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
2043 		lower_32_bits(addr));
2044 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
2045 		upper_32_bits(addr));
2046 
2047 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
2048 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
2049 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
2050 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
2051 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
2052 
2053 	/*
2054 	 * Programming any of the CP_PFP_IC_BASE registers
2055 	 * forces invalidation of the ME L1 I$. Wait for the
2056 	 * invalidation complete
2057 	 */
2058 	for (i = 0; i < usec_timeout; i++) {
2059 		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2060 		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2061 			INVALIDATE_CACHE_COMPLETE))
2062 			break;
2063 		udelay(1);
2064 	}
2065 
2066 	if (i >= usec_timeout) {
2067 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2068 		return -EINVAL;
2069 	}
2070 
2071 	/* Prime the L1 instruction caches */
2072 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2073 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
2074 	WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
2075 	/* Waiting for cache primed*/
2076 	for (i = 0; i < usec_timeout; i++) {
2077 		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2078 		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2079 			ICACHE_PRIMED))
2080 			break;
2081 		udelay(1);
2082 	}
2083 
2084 	if (i >= usec_timeout) {
2085 		dev_err(adev->dev, "failed to prime instruction cache\n");
2086 		return -EINVAL;
2087 	}
2088 
2089 	mutex_lock(&adev->srbm_mutex);
2090 	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2091 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2092 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
2093 			(pfp_hdr->ucode_start_addr_hi << 30) |
2094 			(pfp_hdr->ucode_start_addr_lo >> 2));
2095 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
2096 			pfp_hdr->ucode_start_addr_hi >> 2);
2097 
2098 		/*
2099 		 * Program CP_ME_CNTL to reset given PIPE to take
2100 		 * effect of CP_PFP_PRGRM_CNTR_START.
2101 		 */
2102 		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2103 		if (pipe_id == 0)
2104 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2105 					PFP_PIPE0_RESET, 1);
2106 		else
2107 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2108 					PFP_PIPE1_RESET, 1);
2109 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2110 
2111 		/* Clear pfp pipe0 reset bit. */
2112 		if (pipe_id == 0)
2113 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2114 					PFP_PIPE0_RESET, 0);
2115 		else
2116 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2117 					PFP_PIPE1_RESET, 0);
2118 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2119 
2120 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
2121 			lower_32_bits(addr2));
2122 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
2123 			upper_32_bits(addr2));
2124 	}
2125 	soc21_grbm_select(adev, 0, 0, 0, 0);
2126 	mutex_unlock(&adev->srbm_mutex);
2127 
2128 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2129 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2130 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2131 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2132 
2133 	/* Invalidate the data caches */
2134 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2135 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2136 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2137 
2138 	for (i = 0; i < usec_timeout; i++) {
2139 		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2140 		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2141 			INVALIDATE_DCACHE_COMPLETE))
2142 			break;
2143 		udelay(1);
2144 	}
2145 
2146 	if (i >= usec_timeout) {
2147 		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2148 		return -EINVAL;
2149 	}
2150 
2151 	return 0;
2152 }
2153 
2154 static int gfx_v11_0_config_me_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
2155 {
2156 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2157 	uint32_t tmp;
2158 	unsigned i, pipe_id;
2159 	const struct gfx_firmware_header_v2_0 *me_hdr;
2160 
2161 	me_hdr = (const struct gfx_firmware_header_v2_0 *)
2162 		adev->gfx.me_fw->data;
2163 
2164 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
2165 		lower_32_bits(addr));
2166 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
2167 		upper_32_bits(addr));
2168 
2169 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
2170 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
2171 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
2172 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
2173 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
2174 
2175 	/*
2176 	 * Programming any of the CP_ME_IC_BASE registers
2177 	 * forces invalidation of the ME L1 I$. Wait for the
2178 	 * invalidation complete
2179 	 */
2180 	for (i = 0; i < usec_timeout; i++) {
2181 		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2182 		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2183 			INVALIDATE_CACHE_COMPLETE))
2184 			break;
2185 		udelay(1);
2186 	}
2187 
2188 	if (i >= usec_timeout) {
2189 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2190 		return -EINVAL;
2191 	}
2192 
2193 	/* Prime the instruction caches */
2194 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2195 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
2196 	WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
2197 
2198 	/* Waiting for instruction cache primed*/
2199 	for (i = 0; i < usec_timeout; i++) {
2200 		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2201 		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2202 			ICACHE_PRIMED))
2203 			break;
2204 		udelay(1);
2205 	}
2206 
2207 	if (i >= usec_timeout) {
2208 		dev_err(adev->dev, "failed to prime instruction cache\n");
2209 		return -EINVAL;
2210 	}
2211 
2212 	mutex_lock(&adev->srbm_mutex);
2213 	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2214 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2215 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
2216 			(me_hdr->ucode_start_addr_hi << 30) |
2217 			(me_hdr->ucode_start_addr_lo >> 2) );
2218 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
2219 			me_hdr->ucode_start_addr_hi>>2);
2220 
2221 		/*
2222 		 * Program CP_ME_CNTL to reset given PIPE to take
2223 		 * effect of CP_PFP_PRGRM_CNTR_START.
2224 		 */
2225 		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2226 		if (pipe_id == 0)
2227 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2228 					ME_PIPE0_RESET, 1);
2229 		else
2230 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2231 					ME_PIPE1_RESET, 1);
2232 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2233 
2234 		/* Clear pfp pipe0 reset bit. */
2235 		if (pipe_id == 0)
2236 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2237 					ME_PIPE0_RESET, 0);
2238 		else
2239 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2240 					ME_PIPE1_RESET, 0);
2241 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2242 
2243 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
2244 			lower_32_bits(addr2));
2245 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
2246 			upper_32_bits(addr2));
2247 	}
2248 	soc21_grbm_select(adev, 0, 0, 0, 0);
2249 	mutex_unlock(&adev->srbm_mutex);
2250 
2251 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2252 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2253 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2254 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2255 
2256 	/* Invalidate the data caches */
2257 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2258 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2259 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2260 
2261 	for (i = 0; i < usec_timeout; i++) {
2262 		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2263 		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2264 			INVALIDATE_DCACHE_COMPLETE))
2265 			break;
2266 		udelay(1);
2267 	}
2268 
2269 	if (i >= usec_timeout) {
2270 		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2271 		return -EINVAL;
2272 	}
2273 
2274 	return 0;
2275 }
2276 
2277 static int gfx_v11_0_config_mec_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
2278 {
2279 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2280 	uint32_t tmp;
2281 	unsigned i;
2282 	const struct gfx_firmware_header_v2_0 *mec_hdr;
2283 
2284 	mec_hdr = (const struct gfx_firmware_header_v2_0 *)
2285 		adev->gfx.mec_fw->data;
2286 
2287 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
2288 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
2289 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
2290 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2291 	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
2292 
2293 	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
2294 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
2295 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
2296 	WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
2297 
2298 	mutex_lock(&adev->srbm_mutex);
2299 	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
2300 		soc21_grbm_select(adev, 1, i, 0, 0);
2301 
2302 		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, addr2);
2303 		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
2304 		     upper_32_bits(addr2));
2305 
2306 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
2307 					mec_hdr->ucode_start_addr_lo >> 2 |
2308 					mec_hdr->ucode_start_addr_hi << 30);
2309 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
2310 					mec_hdr->ucode_start_addr_hi >> 2);
2311 
2312 		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, addr);
2313 		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
2314 		     upper_32_bits(addr));
2315 	}
2316 	mutex_unlock(&adev->srbm_mutex);
2317 	soc21_grbm_select(adev, 0, 0, 0, 0);
2318 
2319 	/* Trigger an invalidation of the L1 instruction caches */
2320 	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
2321 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2322 	WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
2323 
2324 	/* Wait for invalidation complete */
2325 	for (i = 0; i < usec_timeout; i++) {
2326 		tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
2327 		if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
2328 				       INVALIDATE_DCACHE_COMPLETE))
2329 			break;
2330 		udelay(1);
2331 	}
2332 
2333 	if (i >= usec_timeout) {
2334 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2335 		return -EINVAL;
2336 	}
2337 
2338 	/* Trigger an invalidation of the L1 instruction caches */
2339 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2340 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2341 	WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
2342 
2343 	/* Wait for invalidation complete */
2344 	for (i = 0; i < usec_timeout; i++) {
2345 		tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2346 		if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
2347 				       INVALIDATE_CACHE_COMPLETE))
2348 			break;
2349 		udelay(1);
2350 	}
2351 
2352 	if (i >= usec_timeout) {
2353 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2354 		return -EINVAL;
2355 	}
2356 
2357 	return 0;
2358 }
2359 
2360 static void gfx_v11_0_config_gfx_rs64(struct amdgpu_device *adev)
2361 {
2362 	const struct gfx_firmware_header_v2_0 *pfp_hdr;
2363 	const struct gfx_firmware_header_v2_0 *me_hdr;
2364 	const struct gfx_firmware_header_v2_0 *mec_hdr;
2365 	uint32_t pipe_id, tmp;
2366 
2367 	mec_hdr = (const struct gfx_firmware_header_v2_0 *)
2368 		adev->gfx.mec_fw->data;
2369 	me_hdr = (const struct gfx_firmware_header_v2_0 *)
2370 		adev->gfx.me_fw->data;
2371 	pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
2372 		adev->gfx.pfp_fw->data;
2373 
2374 	/* config pfp program start addr */
2375 	for (pipe_id = 0; pipe_id < 2; pipe_id++) {
2376 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2377 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
2378 			(pfp_hdr->ucode_start_addr_hi << 30) |
2379 			(pfp_hdr->ucode_start_addr_lo >> 2));
2380 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
2381 			pfp_hdr->ucode_start_addr_hi >> 2);
2382 	}
2383 	soc21_grbm_select(adev, 0, 0, 0, 0);
2384 
2385 	/* reset pfp pipe */
2386 	tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2387 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 1);
2388 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 1);
2389 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2390 
2391 	/* clear pfp pipe reset */
2392 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 0);
2393 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 0);
2394 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2395 
2396 	/* config me program start addr */
2397 	for (pipe_id = 0; pipe_id < 2; pipe_id++) {
2398 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2399 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
2400 			(me_hdr->ucode_start_addr_hi << 30) |
2401 			(me_hdr->ucode_start_addr_lo >> 2) );
2402 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
2403 			me_hdr->ucode_start_addr_hi>>2);
2404 	}
2405 	soc21_grbm_select(adev, 0, 0, 0, 0);
2406 
2407 	/* reset me pipe */
2408 	tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2409 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 1);
2410 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 1);
2411 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2412 
2413 	/* clear me pipe reset */
2414 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 0);
2415 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 0);
2416 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2417 
2418 	/* config mec program start addr */
2419 	for (pipe_id = 0; pipe_id < 4; pipe_id++) {
2420 		soc21_grbm_select(adev, 1, pipe_id, 0, 0);
2421 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
2422 					mec_hdr->ucode_start_addr_lo >> 2 |
2423 					mec_hdr->ucode_start_addr_hi << 30);
2424 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
2425 					mec_hdr->ucode_start_addr_hi >> 2);
2426 	}
2427 	soc21_grbm_select(adev, 0, 0, 0, 0);
2428 
2429 	/* reset mec pipe */
2430 	tmp = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
2431 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1);
2432 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1);
2433 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1);
2434 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1);
2435 	WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
2436 
2437 	/* clear mec pipe reset */
2438 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0);
2439 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0);
2440 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0);
2441 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0);
2442 	WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
2443 }
2444 
2445 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
2446 {
2447 	uint32_t cp_status;
2448 	uint32_t bootload_status;
2449 	int i, r;
2450 	uint64_t addr, addr2;
2451 
2452 	for (i = 0; i < adev->usec_timeout; i++) {
2453 		cp_status = RREG32_SOC15(GC, 0, regCP_STAT);
2454 
2455 		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 1) ||
2456 				adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 4))
2457 			bootload_status = RREG32_SOC15(GC, 0,
2458 					regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1);
2459 		else
2460 			bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS);
2461 
2462 		if ((cp_status == 0) &&
2463 		    (REG_GET_FIELD(bootload_status,
2464 			RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) {
2465 			break;
2466 		}
2467 		udelay(1);
2468 	}
2469 
2470 	if (i >= adev->usec_timeout) {
2471 		dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n");
2472 		return -ETIMEDOUT;
2473 	}
2474 
2475 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
2476 		if (adev->gfx.rs64_enable) {
2477 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2478 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME].offset;
2479 			addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
2480 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME_P0_STACK].offset;
2481 			r = gfx_v11_0_config_me_cache_rs64(adev, addr, addr2);
2482 			if (r)
2483 				return r;
2484 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2485 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP].offset;
2486 			addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
2487 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK].offset;
2488 			r = gfx_v11_0_config_pfp_cache_rs64(adev, addr, addr2);
2489 			if (r)
2490 				return r;
2491 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2492 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC].offset;
2493 			addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
2494 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK].offset;
2495 			r = gfx_v11_0_config_mec_cache_rs64(adev, addr, addr2);
2496 			if (r)
2497 				return r;
2498 		} else {
2499 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2500 				rlc_autoload_info[SOC21_FIRMWARE_ID_CP_ME].offset;
2501 			r = gfx_v11_0_config_me_cache(adev, addr);
2502 			if (r)
2503 				return r;
2504 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2505 				rlc_autoload_info[SOC21_FIRMWARE_ID_CP_PFP].offset;
2506 			r = gfx_v11_0_config_pfp_cache(adev, addr);
2507 			if (r)
2508 				return r;
2509 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2510 				rlc_autoload_info[SOC21_FIRMWARE_ID_CP_MEC].offset;
2511 			r = gfx_v11_0_config_mec_cache(adev, addr);
2512 			if (r)
2513 				return r;
2514 		}
2515 	}
2516 
2517 	return 0;
2518 }
2519 
2520 static int gfx_v11_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2521 {
2522 	int i;
2523 	u32 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2524 
2525 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2526 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2527 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2528 
2529 	for (i = 0; i < adev->usec_timeout; i++) {
2530 		if (RREG32_SOC15(GC, 0, regCP_STAT) == 0)
2531 			break;
2532 		udelay(1);
2533 	}
2534 
2535 	if (i >= adev->usec_timeout)
2536 		DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt");
2537 
2538 	return 0;
2539 }
2540 
2541 static int gfx_v11_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
2542 {
2543 	int r;
2544 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
2545 	const __le32 *fw_data;
2546 	unsigned i, fw_size;
2547 
2548 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2549 		adev->gfx.pfp_fw->data;
2550 
2551 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2552 
2553 	fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
2554 		le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2555 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes);
2556 
2557 	r = amdgpu_bo_create_reserved(adev, pfp_hdr->header.ucode_size_bytes,
2558 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
2559 				      &adev->gfx.pfp.pfp_fw_obj,
2560 				      &adev->gfx.pfp.pfp_fw_gpu_addr,
2561 				      (void **)&adev->gfx.pfp.pfp_fw_ptr);
2562 	if (r) {
2563 		dev_err(adev->dev, "(%d) failed to create pfp fw bo\n", r);
2564 		gfx_v11_0_pfp_fini(adev);
2565 		return r;
2566 	}
2567 
2568 	memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_data, fw_size);
2569 
2570 	amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
2571 	amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
2572 
2573 	gfx_v11_0_config_pfp_cache(adev, adev->gfx.pfp.pfp_fw_gpu_addr);
2574 
2575 	WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, 0);
2576 
2577 	for (i = 0; i < pfp_hdr->jt_size; i++)
2578 		WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_DATA,
2579 			     le32_to_cpup(fw_data + pfp_hdr->jt_offset + i));
2580 
2581 	WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2582 
2583 	return 0;
2584 }
2585 
2586 static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev)
2587 {
2588 	int r;
2589 	const struct gfx_firmware_header_v2_0 *pfp_hdr;
2590 	const __le32 *fw_ucode, *fw_data;
2591 	unsigned i, pipe_id, fw_ucode_size, fw_data_size;
2592 	uint32_t tmp;
2593 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2594 
2595 	pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
2596 		adev->gfx.pfp_fw->data;
2597 
2598 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2599 
2600 	/* instruction */
2601 	fw_ucode = (const __le32 *)(adev->gfx.pfp_fw->data +
2602 		le32_to_cpu(pfp_hdr->ucode_offset_bytes));
2603 	fw_ucode_size = le32_to_cpu(pfp_hdr->ucode_size_bytes);
2604 	/* data */
2605 	fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
2606 		le32_to_cpu(pfp_hdr->data_offset_bytes));
2607 	fw_data_size = le32_to_cpu(pfp_hdr->data_size_bytes);
2608 
2609 	/* 64kb align */
2610 	r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
2611 				      64 * 1024,
2612 				      AMDGPU_GEM_DOMAIN_VRAM |
2613 				      AMDGPU_GEM_DOMAIN_GTT,
2614 				      &adev->gfx.pfp.pfp_fw_obj,
2615 				      &adev->gfx.pfp.pfp_fw_gpu_addr,
2616 				      (void **)&adev->gfx.pfp.pfp_fw_ptr);
2617 	if (r) {
2618 		dev_err(adev->dev, "(%d) failed to create pfp ucode fw bo\n", r);
2619 		gfx_v11_0_pfp_fini(adev);
2620 		return r;
2621 	}
2622 
2623 	r = amdgpu_bo_create_reserved(adev, fw_data_size,
2624 				      64 * 1024,
2625 				      AMDGPU_GEM_DOMAIN_VRAM |
2626 				      AMDGPU_GEM_DOMAIN_GTT,
2627 				      &adev->gfx.pfp.pfp_fw_data_obj,
2628 				      &adev->gfx.pfp.pfp_fw_data_gpu_addr,
2629 				      (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
2630 	if (r) {
2631 		dev_err(adev->dev, "(%d) failed to create pfp data fw bo\n", r);
2632 		gfx_v11_0_pfp_fini(adev);
2633 		return r;
2634 	}
2635 
2636 	memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_ucode, fw_ucode_size);
2637 	memcpy(adev->gfx.pfp.pfp_fw_data_ptr, fw_data, fw_data_size);
2638 
2639 	amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
2640 	amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_data_obj);
2641 	amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
2642 	amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj);
2643 
2644 	if (amdgpu_emu_mode == 1)
2645 		adev->hdp.funcs->flush_hdp(adev, NULL);
2646 
2647 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
2648 		lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
2649 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
2650 		upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
2651 
2652 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
2653 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
2654 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
2655 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
2656 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
2657 
2658 	/*
2659 	 * Programming any of the CP_PFP_IC_BASE registers
2660 	 * forces invalidation of the ME L1 I$. Wait for the
2661 	 * invalidation complete
2662 	 */
2663 	for (i = 0; i < usec_timeout; i++) {
2664 		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2665 		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2666 			INVALIDATE_CACHE_COMPLETE))
2667 			break;
2668 		udelay(1);
2669 	}
2670 
2671 	if (i >= usec_timeout) {
2672 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2673 		return -EINVAL;
2674 	}
2675 
2676 	/* Prime the L1 instruction caches */
2677 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2678 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
2679 	WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
2680 	/* Waiting for cache primed*/
2681 	for (i = 0; i < usec_timeout; i++) {
2682 		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2683 		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2684 			ICACHE_PRIMED))
2685 			break;
2686 		udelay(1);
2687 	}
2688 
2689 	if (i >= usec_timeout) {
2690 		dev_err(adev->dev, "failed to prime instruction cache\n");
2691 		return -EINVAL;
2692 	}
2693 
2694 	mutex_lock(&adev->srbm_mutex);
2695 	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2696 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2697 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
2698 			(pfp_hdr->ucode_start_addr_hi << 30) |
2699 			(pfp_hdr->ucode_start_addr_lo >> 2) );
2700 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
2701 			pfp_hdr->ucode_start_addr_hi>>2);
2702 
2703 		/*
2704 		 * Program CP_ME_CNTL to reset given PIPE to take
2705 		 * effect of CP_PFP_PRGRM_CNTR_START.
2706 		 */
2707 		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2708 		if (pipe_id == 0)
2709 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2710 					PFP_PIPE0_RESET, 1);
2711 		else
2712 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2713 					PFP_PIPE1_RESET, 1);
2714 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2715 
2716 		/* Clear pfp pipe0 reset bit. */
2717 		if (pipe_id == 0)
2718 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2719 					PFP_PIPE0_RESET, 0);
2720 		else
2721 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2722 					PFP_PIPE1_RESET, 0);
2723 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2724 
2725 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
2726 			lower_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
2727 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
2728 			upper_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
2729 	}
2730 	soc21_grbm_select(adev, 0, 0, 0, 0);
2731 	mutex_unlock(&adev->srbm_mutex);
2732 
2733 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2734 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2735 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2736 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2737 
2738 	/* Invalidate the data caches */
2739 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2740 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2741 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2742 
2743 	for (i = 0; i < usec_timeout; i++) {
2744 		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2745 		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2746 			INVALIDATE_DCACHE_COMPLETE))
2747 			break;
2748 		udelay(1);
2749 	}
2750 
2751 	if (i >= usec_timeout) {
2752 		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2753 		return -EINVAL;
2754 	}
2755 
2756 	return 0;
2757 }
2758 
2759 static int gfx_v11_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev)
2760 {
2761 	int r;
2762 	const struct gfx_firmware_header_v1_0 *me_hdr;
2763 	const __le32 *fw_data;
2764 	unsigned i, fw_size;
2765 
2766 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
2767 		adev->gfx.me_fw->data;
2768 
2769 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2770 
2771 	fw_data = (const __le32 *)(adev->gfx.me_fw->data +
2772 		le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2773 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes);
2774 
2775 	r = amdgpu_bo_create_reserved(adev, me_hdr->header.ucode_size_bytes,
2776 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
2777 				      &adev->gfx.me.me_fw_obj,
2778 				      &adev->gfx.me.me_fw_gpu_addr,
2779 				      (void **)&adev->gfx.me.me_fw_ptr);
2780 	if (r) {
2781 		dev_err(adev->dev, "(%d) failed to create me fw bo\n", r);
2782 		gfx_v11_0_me_fini(adev);
2783 		return r;
2784 	}
2785 
2786 	memcpy(adev->gfx.me.me_fw_ptr, fw_data, fw_size);
2787 
2788 	amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
2789 	amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
2790 
2791 	gfx_v11_0_config_me_cache(adev, adev->gfx.me.me_fw_gpu_addr);
2792 
2793 	WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, 0);
2794 
2795 	for (i = 0; i < me_hdr->jt_size; i++)
2796 		WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_DATA,
2797 			     le32_to_cpup(fw_data + me_hdr->jt_offset + i));
2798 
2799 	WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, adev->gfx.me_fw_version);
2800 
2801 	return 0;
2802 }
2803 
2804 static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev)
2805 {
2806 	int r;
2807 	const struct gfx_firmware_header_v2_0 *me_hdr;
2808 	const __le32 *fw_ucode, *fw_data;
2809 	unsigned i, pipe_id, fw_ucode_size, fw_data_size;
2810 	uint32_t tmp;
2811 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2812 
2813 	me_hdr = (const struct gfx_firmware_header_v2_0 *)
2814 		adev->gfx.me_fw->data;
2815 
2816 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2817 
2818 	/* instruction */
2819 	fw_ucode = (const __le32 *)(adev->gfx.me_fw->data +
2820 		le32_to_cpu(me_hdr->ucode_offset_bytes));
2821 	fw_ucode_size = le32_to_cpu(me_hdr->ucode_size_bytes);
2822 	/* data */
2823 	fw_data = (const __le32 *)(adev->gfx.me_fw->data +
2824 		le32_to_cpu(me_hdr->data_offset_bytes));
2825 	fw_data_size = le32_to_cpu(me_hdr->data_size_bytes);
2826 
2827 	/* 64kb align*/
2828 	r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
2829 				      64 * 1024,
2830 				      AMDGPU_GEM_DOMAIN_VRAM |
2831 				      AMDGPU_GEM_DOMAIN_GTT,
2832 				      &adev->gfx.me.me_fw_obj,
2833 				      &adev->gfx.me.me_fw_gpu_addr,
2834 				      (void **)&adev->gfx.me.me_fw_ptr);
2835 	if (r) {
2836 		dev_err(adev->dev, "(%d) failed to create me ucode bo\n", r);
2837 		gfx_v11_0_me_fini(adev);
2838 		return r;
2839 	}
2840 
2841 	r = amdgpu_bo_create_reserved(adev, fw_data_size,
2842 				      64 * 1024,
2843 				      AMDGPU_GEM_DOMAIN_VRAM |
2844 				      AMDGPU_GEM_DOMAIN_GTT,
2845 				      &adev->gfx.me.me_fw_data_obj,
2846 				      &adev->gfx.me.me_fw_data_gpu_addr,
2847 				      (void **)&adev->gfx.me.me_fw_data_ptr);
2848 	if (r) {
2849 		dev_err(adev->dev, "(%d) failed to create me data bo\n", r);
2850 		gfx_v11_0_pfp_fini(adev);
2851 		return r;
2852 	}
2853 
2854 	memcpy(adev->gfx.me.me_fw_ptr, fw_ucode, fw_ucode_size);
2855 	memcpy(adev->gfx.me.me_fw_data_ptr, fw_data, fw_data_size);
2856 
2857 	amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
2858 	amdgpu_bo_kunmap(adev->gfx.me.me_fw_data_obj);
2859 	amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
2860 	amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj);
2861 
2862 	if (amdgpu_emu_mode == 1)
2863 		adev->hdp.funcs->flush_hdp(adev, NULL);
2864 
2865 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
2866 		lower_32_bits(adev->gfx.me.me_fw_gpu_addr));
2867 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
2868 		upper_32_bits(adev->gfx.me.me_fw_gpu_addr));
2869 
2870 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
2871 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
2872 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
2873 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
2874 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
2875 
2876 	/*
2877 	 * Programming any of the CP_ME_IC_BASE registers
2878 	 * forces invalidation of the ME L1 I$. Wait for the
2879 	 * invalidation complete
2880 	 */
2881 	for (i = 0; i < usec_timeout; i++) {
2882 		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2883 		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2884 			INVALIDATE_CACHE_COMPLETE))
2885 			break;
2886 		udelay(1);
2887 	}
2888 
2889 	if (i >= usec_timeout) {
2890 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2891 		return -EINVAL;
2892 	}
2893 
2894 	/* Prime the instruction caches */
2895 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2896 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
2897 	WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
2898 
2899 	/* Waiting for instruction cache primed*/
2900 	for (i = 0; i < usec_timeout; i++) {
2901 		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2902 		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2903 			ICACHE_PRIMED))
2904 			break;
2905 		udelay(1);
2906 	}
2907 
2908 	if (i >= usec_timeout) {
2909 		dev_err(adev->dev, "failed to prime instruction cache\n");
2910 		return -EINVAL;
2911 	}
2912 
2913 	mutex_lock(&adev->srbm_mutex);
2914 	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2915 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2916 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
2917 			(me_hdr->ucode_start_addr_hi << 30) |
2918 			(me_hdr->ucode_start_addr_lo >> 2) );
2919 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
2920 			me_hdr->ucode_start_addr_hi>>2);
2921 
2922 		/*
2923 		 * Program CP_ME_CNTL to reset given PIPE to take
2924 		 * effect of CP_PFP_PRGRM_CNTR_START.
2925 		 */
2926 		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2927 		if (pipe_id == 0)
2928 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2929 					ME_PIPE0_RESET, 1);
2930 		else
2931 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2932 					ME_PIPE1_RESET, 1);
2933 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2934 
2935 		/* Clear pfp pipe0 reset bit. */
2936 		if (pipe_id == 0)
2937 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2938 					ME_PIPE0_RESET, 0);
2939 		else
2940 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2941 					ME_PIPE1_RESET, 0);
2942 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2943 
2944 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
2945 			lower_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
2946 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
2947 			upper_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
2948 	}
2949 	soc21_grbm_select(adev, 0, 0, 0, 0);
2950 	mutex_unlock(&adev->srbm_mutex);
2951 
2952 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2953 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2954 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2955 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2956 
2957 	/* Invalidate the data caches */
2958 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2959 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2960 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2961 
2962 	for (i = 0; i < usec_timeout; i++) {
2963 		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2964 		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2965 			INVALIDATE_DCACHE_COMPLETE))
2966 			break;
2967 		udelay(1);
2968 	}
2969 
2970 	if (i >= usec_timeout) {
2971 		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2972 		return -EINVAL;
2973 	}
2974 
2975 	return 0;
2976 }
2977 
2978 static int gfx_v11_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2979 {
2980 	int r;
2981 
2982 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw)
2983 		return -EINVAL;
2984 
2985 	gfx_v11_0_cp_gfx_enable(adev, false);
2986 
2987 	if (adev->gfx.rs64_enable)
2988 		r = gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(adev);
2989 	else
2990 		r = gfx_v11_0_cp_gfx_load_pfp_microcode(adev);
2991 	if (r) {
2992 		dev_err(adev->dev, "(%d) failed to load pfp fw\n", r);
2993 		return r;
2994 	}
2995 
2996 	if (adev->gfx.rs64_enable)
2997 		r = gfx_v11_0_cp_gfx_load_me_microcode_rs64(adev);
2998 	else
2999 		r = gfx_v11_0_cp_gfx_load_me_microcode(adev);
3000 	if (r) {
3001 		dev_err(adev->dev, "(%d) failed to load me fw\n", r);
3002 		return r;
3003 	}
3004 
3005 	return 0;
3006 }
3007 
3008 static int gfx_v11_0_cp_gfx_start(struct amdgpu_device *adev)
3009 {
3010 	struct amdgpu_ring *ring;
3011 	const struct cs_section_def *sect = NULL;
3012 	const struct cs_extent_def *ext = NULL;
3013 	int r, i;
3014 	int ctx_reg_offset;
3015 
3016 	/* init the CP */
3017 	WREG32_SOC15(GC, 0, regCP_MAX_CONTEXT,
3018 		     adev->gfx.config.max_hw_contexts - 1);
3019 	WREG32_SOC15(GC, 0, regCP_DEVICE_ID, 1);
3020 
3021 	if (!amdgpu_async_gfx_ring)
3022 		gfx_v11_0_cp_gfx_enable(adev, true);
3023 
3024 	ring = &adev->gfx.gfx_ring[0];
3025 	r = amdgpu_ring_alloc(ring, gfx_v11_0_get_csb_size(adev));
3026 	if (r) {
3027 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3028 		return r;
3029 	}
3030 
3031 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3032 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3033 
3034 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3035 	amdgpu_ring_write(ring, 0x80000000);
3036 	amdgpu_ring_write(ring, 0x80000000);
3037 
3038 	for (sect = gfx11_cs_data; sect->section != NULL; ++sect) {
3039 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3040 			if (sect->id == SECT_CONTEXT) {
3041 				amdgpu_ring_write(ring,
3042 						  PACKET3(PACKET3_SET_CONTEXT_REG,
3043 							  ext->reg_count));
3044 				amdgpu_ring_write(ring, ext->reg_index -
3045 						  PACKET3_SET_CONTEXT_REG_START);
3046 				for (i = 0; i < ext->reg_count; i++)
3047 					amdgpu_ring_write(ring, ext->extent[i]);
3048 			}
3049 		}
3050 	}
3051 
3052 	ctx_reg_offset =
3053 		SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
3054 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
3055 	amdgpu_ring_write(ring, ctx_reg_offset);
3056 	amdgpu_ring_write(ring, adev->gfx.config.pa_sc_tile_steering_override);
3057 
3058 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3059 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3060 
3061 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3062 	amdgpu_ring_write(ring, 0);
3063 
3064 	amdgpu_ring_commit(ring);
3065 
3066 	/* submit cs packet to copy state 0 to next available state */
3067 	if (adev->gfx.num_gfx_rings > 1) {
3068 		/* maximum supported gfx ring is 2 */
3069 		ring = &adev->gfx.gfx_ring[1];
3070 		r = amdgpu_ring_alloc(ring, 2);
3071 		if (r) {
3072 			DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3073 			return r;
3074 		}
3075 
3076 		amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3077 		amdgpu_ring_write(ring, 0);
3078 
3079 		amdgpu_ring_commit(ring);
3080 	}
3081 	return 0;
3082 }
3083 
3084 static void gfx_v11_0_cp_gfx_switch_pipe(struct amdgpu_device *adev,
3085 					 CP_PIPE_ID pipe)
3086 {
3087 	u32 tmp;
3088 
3089 	tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
3090 	tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe);
3091 
3092 	WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
3093 }
3094 
3095 static void gfx_v11_0_cp_gfx_set_doorbell(struct amdgpu_device *adev,
3096 					  struct amdgpu_ring *ring)
3097 {
3098 	u32 tmp;
3099 
3100 	tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
3101 	if (ring->use_doorbell) {
3102 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3103 				    DOORBELL_OFFSET, ring->doorbell_index);
3104 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3105 				    DOORBELL_EN, 1);
3106 	} else {
3107 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3108 				    DOORBELL_EN, 0);
3109 	}
3110 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, tmp);
3111 
3112 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3113 			    DOORBELL_RANGE_LOWER, ring->doorbell_index);
3114 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, tmp);
3115 
3116 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
3117 		     CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3118 }
3119 
3120 static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev)
3121 {
3122 	struct amdgpu_ring *ring;
3123 	u32 tmp;
3124 	u32 rb_bufsz;
3125 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
3126 	u32 i;
3127 
3128 	/* Set the write pointer delay */
3129 	WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0);
3130 
3131 	/* set the RB to use vmid 0 */
3132 	WREG32_SOC15(GC, 0, regCP_RB_VMID, 0);
3133 
3134 	/* Init gfx ring 0 for pipe 0 */
3135 	mutex_lock(&adev->srbm_mutex);
3136 	gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
3137 
3138 	/* Set ring buffer size */
3139 	ring = &adev->gfx.gfx_ring[0];
3140 	rb_bufsz = order_base_2(ring->ring_size / 8);
3141 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3142 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3143 	WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
3144 
3145 	/* Initialize the ring buffer's write pointers */
3146 	ring->wptr = 0;
3147 	WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr));
3148 	WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3149 
3150 	/* set the wb address wether it's enabled or not */
3151 	rptr_addr = ring->rptr_gpu_addr;
3152 	WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3153 	WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
3154 		     CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3155 
3156 	wptr_gpu_addr = ring->wptr_gpu_addr;
3157 	WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
3158 		     lower_32_bits(wptr_gpu_addr));
3159 	WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
3160 		     upper_32_bits(wptr_gpu_addr));
3161 
3162 	mdelay(1);
3163 	WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
3164 
3165 	rb_addr = ring->gpu_addr >> 8;
3166 	WREG32_SOC15(GC, 0, regCP_RB0_BASE, rb_addr);
3167 	WREG32_SOC15(GC, 0, regCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3168 
3169 	WREG32_SOC15(GC, 0, regCP_RB_ACTIVE, 1);
3170 
3171 	gfx_v11_0_cp_gfx_set_doorbell(adev, ring);
3172 	mutex_unlock(&adev->srbm_mutex);
3173 
3174 	/* Init gfx ring 1 for pipe 1 */
3175 	if (adev->gfx.num_gfx_rings > 1) {
3176 		mutex_lock(&adev->srbm_mutex);
3177 		gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID1);
3178 		/* maximum supported gfx ring is 2 */
3179 		ring = &adev->gfx.gfx_ring[1];
3180 		rb_bufsz = order_base_2(ring->ring_size / 8);
3181 		tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz);
3182 		tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2);
3183 		WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp);
3184 		/* Initialize the ring buffer's write pointers */
3185 		ring->wptr = 0;
3186 		WREG32_SOC15(GC, 0, regCP_RB1_WPTR, lower_32_bits(ring->wptr));
3187 		WREG32_SOC15(GC, 0, regCP_RB1_WPTR_HI, upper_32_bits(ring->wptr));
3188 		/* Set the wb address wether it's enabled or not */
3189 		rptr_addr = ring->rptr_gpu_addr;
3190 		WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
3191 		WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
3192 			     CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3193 		wptr_gpu_addr = ring->wptr_gpu_addr;
3194 		WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
3195 			     lower_32_bits(wptr_gpu_addr));
3196 		WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
3197 			     upper_32_bits(wptr_gpu_addr));
3198 
3199 		mdelay(1);
3200 		WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp);
3201 
3202 		rb_addr = ring->gpu_addr >> 8;
3203 		WREG32_SOC15(GC, 0, regCP_RB1_BASE, rb_addr);
3204 		WREG32_SOC15(GC, 0, regCP_RB1_BASE_HI, upper_32_bits(rb_addr));
3205 		WREG32_SOC15(GC, 0, regCP_RB1_ACTIVE, 1);
3206 
3207 		gfx_v11_0_cp_gfx_set_doorbell(adev, ring);
3208 		mutex_unlock(&adev->srbm_mutex);
3209 	}
3210 	/* Switch to pipe 0 */
3211 	mutex_lock(&adev->srbm_mutex);
3212 	gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
3213 	mutex_unlock(&adev->srbm_mutex);
3214 
3215 	/* start the ring */
3216 	gfx_v11_0_cp_gfx_start(adev);
3217 
3218 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
3219 		ring = &adev->gfx.gfx_ring[i];
3220 		ring->sched.ready = true;
3221 	}
3222 
3223 	return 0;
3224 }
3225 
3226 static void gfx_v11_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3227 {
3228 	u32 data;
3229 
3230 	if (adev->gfx.rs64_enable) {
3231 		data = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
3232 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE,
3233 							 enable ? 0 : 1);
3234 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET,
3235 							 enable ? 0 : 1);
3236 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET,
3237 							 enable ? 0 : 1);
3238 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET,
3239 							 enable ? 0 : 1);
3240 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET,
3241 							 enable ? 0 : 1);
3242 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE,
3243 							 enable ? 1 : 0);
3244 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE,
3245 				                         enable ? 1 : 0);
3246 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE,
3247 							 enable ? 1 : 0);
3248 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE,
3249 							 enable ? 1 : 0);
3250 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT,
3251 							 enable ? 0 : 1);
3252 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, data);
3253 	} else {
3254 		data = RREG32_SOC15(GC, 0, regCP_MEC_CNTL);
3255 
3256 		if (enable) {
3257 			data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 0);
3258 			if (!adev->enable_mes_kiq)
3259 				data = REG_SET_FIELD(data, CP_MEC_CNTL,
3260 						     MEC_ME2_HALT, 0);
3261 		} else {
3262 			data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 1);
3263 			data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME2_HALT, 1);
3264 		}
3265 		WREG32_SOC15(GC, 0, regCP_MEC_CNTL, data);
3266 	}
3267 
3268 	adev->gfx.kiq.ring.sched.ready = enable;
3269 
3270 	udelay(50);
3271 }
3272 
3273 static int gfx_v11_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3274 {
3275 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3276 	const __le32 *fw_data;
3277 	unsigned i, fw_size;
3278 	u32 *fw = NULL;
3279 	int r;
3280 
3281 	if (!adev->gfx.mec_fw)
3282 		return -EINVAL;
3283 
3284 	gfx_v11_0_cp_compute_enable(adev, false);
3285 
3286 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3287 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3288 
3289 	fw_data = (const __le32 *)
3290 		(adev->gfx.mec_fw->data +
3291 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3292 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
3293 
3294 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
3295 					  PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
3296 					  &adev->gfx.mec.mec_fw_obj,
3297 					  &adev->gfx.mec.mec_fw_gpu_addr,
3298 					  (void **)&fw);
3299 	if (r) {
3300 		dev_err(adev->dev, "(%d) failed to create mec fw bo\n", r);
3301 		gfx_v11_0_mec_fini(adev);
3302 		return r;
3303 	}
3304 
3305 	memcpy(fw, fw_data, fw_size);
3306 
3307 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
3308 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
3309 
3310 	gfx_v11_0_config_mec_cache(adev, adev->gfx.mec.mec_fw_gpu_addr);
3311 
3312 	/* MEC1 */
3313 	WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, 0);
3314 
3315 	for (i = 0; i < mec_hdr->jt_size; i++)
3316 		WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_DATA,
3317 			     le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3318 
3319 	WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
3320 
3321 	return 0;
3322 }
3323 
3324 static int gfx_v11_0_cp_compute_load_microcode_rs64(struct amdgpu_device *adev)
3325 {
3326 	const struct gfx_firmware_header_v2_0 *mec_hdr;
3327 	const __le32 *fw_ucode, *fw_data;
3328 	u32 tmp, fw_ucode_size, fw_data_size;
3329 	u32 i, usec_timeout = 50000; /* Wait for 50 ms */
3330 	u32 *fw_ucode_ptr, *fw_data_ptr;
3331 	int r;
3332 
3333 	if (!adev->gfx.mec_fw)
3334 		return -EINVAL;
3335 
3336 	gfx_v11_0_cp_compute_enable(adev, false);
3337 
3338 	mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
3339 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3340 
3341 	fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data +
3342 				le32_to_cpu(mec_hdr->ucode_offset_bytes));
3343 	fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes);
3344 
3345 	fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
3346 				le32_to_cpu(mec_hdr->data_offset_bytes));
3347 	fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes);
3348 
3349 	r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
3350 				      64 * 1024,
3351 				      AMDGPU_GEM_DOMAIN_VRAM |
3352 				      AMDGPU_GEM_DOMAIN_GTT,
3353 				      &adev->gfx.mec.mec_fw_obj,
3354 				      &adev->gfx.mec.mec_fw_gpu_addr,
3355 				      (void **)&fw_ucode_ptr);
3356 	if (r) {
3357 		dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
3358 		gfx_v11_0_mec_fini(adev);
3359 		return r;
3360 	}
3361 
3362 	r = amdgpu_bo_create_reserved(adev, fw_data_size,
3363 				      64 * 1024,
3364 				      AMDGPU_GEM_DOMAIN_VRAM |
3365 				      AMDGPU_GEM_DOMAIN_GTT,
3366 				      &adev->gfx.mec.mec_fw_data_obj,
3367 				      &adev->gfx.mec.mec_fw_data_gpu_addr,
3368 				      (void **)&fw_data_ptr);
3369 	if (r) {
3370 		dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
3371 		gfx_v11_0_mec_fini(adev);
3372 		return r;
3373 	}
3374 
3375 	memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size);
3376 	memcpy(fw_data_ptr, fw_data, fw_data_size);
3377 
3378 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
3379 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj);
3380 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
3381 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj);
3382 
3383 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
3384 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3385 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
3386 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3387 	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
3388 
3389 	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
3390 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
3391 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
3392 	WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
3393 
3394 	mutex_lock(&adev->srbm_mutex);
3395 	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
3396 		soc21_grbm_select(adev, 1, i, 0, 0);
3397 
3398 		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, adev->gfx.mec.mec_fw_data_gpu_addr);
3399 		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
3400 		     upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr));
3401 
3402 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
3403 					mec_hdr->ucode_start_addr_lo >> 2 |
3404 					mec_hdr->ucode_start_addr_hi << 30);
3405 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
3406 					mec_hdr->ucode_start_addr_hi >> 2);
3407 
3408 		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr);
3409 		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
3410 		     upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3411 	}
3412 	mutex_unlock(&adev->srbm_mutex);
3413 	soc21_grbm_select(adev, 0, 0, 0, 0);
3414 
3415 	/* Trigger an invalidation of the L1 instruction caches */
3416 	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
3417 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
3418 	WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
3419 
3420 	/* Wait for invalidation complete */
3421 	for (i = 0; i < usec_timeout; i++) {
3422 		tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
3423 		if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
3424 				       INVALIDATE_DCACHE_COMPLETE))
3425 			break;
3426 		udelay(1);
3427 	}
3428 
3429 	if (i >= usec_timeout) {
3430 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
3431 		return -EINVAL;
3432 	}
3433 
3434 	/* Trigger an invalidation of the L1 instruction caches */
3435 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
3436 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
3437 	WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
3438 
3439 	/* Wait for invalidation complete */
3440 	for (i = 0; i < usec_timeout; i++) {
3441 		tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
3442 		if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
3443 				       INVALIDATE_CACHE_COMPLETE))
3444 			break;
3445 		udelay(1);
3446 	}
3447 
3448 	if (i >= usec_timeout) {
3449 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
3450 		return -EINVAL;
3451 	}
3452 
3453 	return 0;
3454 }
3455 
3456 static void gfx_v11_0_kiq_setting(struct amdgpu_ring *ring)
3457 {
3458 	uint32_t tmp;
3459 	struct amdgpu_device *adev = ring->adev;
3460 
3461 	/* tell RLC which is KIQ queue */
3462 	tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
3463 	tmp &= 0xffffff00;
3464 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3465 	WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
3466 	tmp |= 0x80;
3467 	WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
3468 }
3469 
3470 static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev)
3471 {
3472 	/* set graphics engine doorbell range */
3473 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER,
3474 		     (adev->doorbell_index.gfx_ring0 * 2) << 2);
3475 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
3476 		     (adev->doorbell_index.gfx_userqueue_end * 2) << 2);
3477 
3478 	/* set compute engine doorbell range */
3479 	WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
3480 		     (adev->doorbell_index.kiq * 2) << 2);
3481 	WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
3482 		     (adev->doorbell_index.userqueue_end * 2) << 2);
3483 }
3484 
3485 static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
3486 				  struct amdgpu_mqd_prop *prop)
3487 {
3488 	struct v11_gfx_mqd *mqd = m;
3489 	uint64_t hqd_gpu_addr, wb_gpu_addr;
3490 	uint32_t tmp;
3491 	uint32_t rb_bufsz;
3492 
3493 	/* set up gfx hqd wptr */
3494 	mqd->cp_gfx_hqd_wptr = 0;
3495 	mqd->cp_gfx_hqd_wptr_hi = 0;
3496 
3497 	/* set the pointer to the MQD */
3498 	mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc;
3499 	mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
3500 
3501 	/* set up mqd control */
3502 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL);
3503 	tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0);
3504 	tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1);
3505 	tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0);
3506 	mqd->cp_gfx_mqd_control = tmp;
3507 
3508 	/* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */
3509 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID);
3510 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
3511 	mqd->cp_gfx_hqd_vmid = 0;
3512 
3513 	/* set up default queue priority level
3514 	 * 0x0 = low priority, 0x1 = high priority */
3515 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY);
3516 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0);
3517 	mqd->cp_gfx_hqd_queue_priority = tmp;
3518 
3519 	/* set up time quantum */
3520 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM);
3521 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1);
3522 	mqd->cp_gfx_hqd_quantum = tmp;
3523 
3524 	/* set up gfx hqd base. this is similar as CP_RB_BASE */
3525 	hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
3526 	mqd->cp_gfx_hqd_base = hqd_gpu_addr;
3527 	mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr);
3528 
3529 	/* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */
3530 	wb_gpu_addr = prop->rptr_gpu_addr;
3531 	mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc;
3532 	mqd->cp_gfx_hqd_rptr_addr_hi =
3533 		upper_32_bits(wb_gpu_addr) & 0xffff;
3534 
3535 	/* set up rb_wptr_poll addr */
3536 	wb_gpu_addr = prop->wptr_gpu_addr;
3537 	mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3538 	mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3539 
3540 	/* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
3541 	rb_bufsz = order_base_2(prop->queue_size / 4) - 1;
3542 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL);
3543 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
3544 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
3545 #ifdef __BIG_ENDIAN
3546 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
3547 #endif
3548 	mqd->cp_gfx_hqd_cntl = tmp;
3549 
3550 	/* set up cp_doorbell_control */
3551 	tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
3552 	if (prop->use_doorbell) {
3553 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3554 				    DOORBELL_OFFSET, prop->doorbell_index);
3555 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3556 				    DOORBELL_EN, 1);
3557 	} else
3558 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3559 				    DOORBELL_EN, 0);
3560 	mqd->cp_rb_doorbell_control = tmp;
3561 
3562 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3563 	mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR);
3564 
3565 	/* active the queue */
3566 	mqd->cp_gfx_hqd_active = 1;
3567 
3568 	return 0;
3569 }
3570 
3571 #ifdef BRING_UP_DEBUG
3572 static int gfx_v11_0_gfx_queue_init_register(struct amdgpu_ring *ring)
3573 {
3574 	struct amdgpu_device *adev = ring->adev;
3575 	struct v11_gfx_mqd *mqd = ring->mqd_ptr;
3576 
3577 	/* set mmCP_GFX_HQD_WPTR/_HI to 0 */
3578 	WREG32_SOC15(GC, 0, regCP_GFX_HQD_WPTR, mqd->cp_gfx_hqd_wptr);
3579 	WREG32_SOC15(GC, 0, regCP_GFX_HQD_WPTR_HI, mqd->cp_gfx_hqd_wptr_hi);
3580 
3581 	/* set GFX_MQD_BASE */
3582 	WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr);
3583 	WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
3584 
3585 	/* set GFX_MQD_CONTROL */
3586 	WREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL, mqd->cp_gfx_mqd_control);
3587 
3588 	/* set GFX_HQD_VMID to 0 */
3589 	WREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID, mqd->cp_gfx_hqd_vmid);
3590 
3591 	WREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY,
3592 			mqd->cp_gfx_hqd_queue_priority);
3593 	WREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM, mqd->cp_gfx_hqd_quantum);
3594 
3595 	/* set GFX_HQD_BASE, similar as CP_RB_BASE */
3596 	WREG32_SOC15(GC, 0, regCP_GFX_HQD_BASE, mqd->cp_gfx_hqd_base);
3597 	WREG32_SOC15(GC, 0, regCP_GFX_HQD_BASE_HI, mqd->cp_gfx_hqd_base_hi);
3598 
3599 	/* set GFX_HQD_RPTR_ADDR, similar as CP_RB_RPTR */
3600 	WREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR_ADDR, mqd->cp_gfx_hqd_rptr_addr);
3601 	WREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR_ADDR_HI, mqd->cp_gfx_hqd_rptr_addr_hi);
3602 
3603 	/* set GFX_HQD_CNTL, similar as CP_RB_CNTL */
3604 	WREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL, mqd->cp_gfx_hqd_cntl);
3605 
3606 	/* set RB_WPTR_POLL_ADDR */
3607 	WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO, mqd->cp_rb_wptr_poll_addr_lo);
3608 	WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI, mqd->cp_rb_wptr_poll_addr_hi);
3609 
3610 	/* set RB_DOORBELL_CONTROL */
3611 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, mqd->cp_rb_doorbell_control);
3612 
3613 	/* active the queue */
3614 	WREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE, mqd->cp_gfx_hqd_active);
3615 
3616 	return 0;
3617 }
3618 #endif
3619 
3620 static int gfx_v11_0_gfx_init_queue(struct amdgpu_ring *ring)
3621 {
3622 	struct amdgpu_device *adev = ring->adev;
3623 	struct v11_gfx_mqd *mqd = ring->mqd_ptr;
3624 	int mqd_idx = ring - &adev->gfx.gfx_ring[0];
3625 
3626 	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
3627 		memset((void *)mqd, 0, sizeof(*mqd));
3628 		mutex_lock(&adev->srbm_mutex);
3629 		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3630 		amdgpu_ring_init_mqd(ring);
3631 #ifdef BRING_UP_DEBUG
3632 		gfx_v11_0_gfx_queue_init_register(ring);
3633 #endif
3634 		soc21_grbm_select(adev, 0, 0, 0, 0);
3635 		mutex_unlock(&adev->srbm_mutex);
3636 		if (adev->gfx.me.mqd_backup[mqd_idx])
3637 			memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
3638 	} else if (amdgpu_in_reset(adev)) {
3639 		/* reset mqd with the backup copy */
3640 		if (adev->gfx.me.mqd_backup[mqd_idx])
3641 			memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
3642 		/* reset the ring */
3643 		ring->wptr = 0;
3644 		*ring->wptr_cpu_addr = 0;
3645 		amdgpu_ring_clear_ring(ring);
3646 #ifdef BRING_UP_DEBUG
3647 		mutex_lock(&adev->srbm_mutex);
3648 		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3649 		gfx_v11_0_gfx_queue_init_register(ring);
3650 		soc21_grbm_select(adev, 0, 0, 0, 0);
3651 		mutex_unlock(&adev->srbm_mutex);
3652 #endif
3653 	} else {
3654 		amdgpu_ring_clear_ring(ring);
3655 	}
3656 
3657 	return 0;
3658 }
3659 
3660 #ifndef BRING_UP_DEBUG
3661 static int gfx_v11_0_kiq_enable_kgq(struct amdgpu_device *adev)
3662 {
3663 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
3664 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3665 	int r, i;
3666 
3667 	if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
3668 		return -EINVAL;
3669 
3670 	r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
3671 					adev->gfx.num_gfx_rings);
3672 	if (r) {
3673 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3674 		return r;
3675 	}
3676 
3677 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3678 		kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.gfx_ring[i]);
3679 
3680 	return amdgpu_ring_test_helper(kiq_ring);
3681 }
3682 #endif
3683 
3684 static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
3685 {
3686 	int r, i;
3687 	struct amdgpu_ring *ring;
3688 
3689 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
3690 		ring = &adev->gfx.gfx_ring[i];
3691 
3692 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3693 		if (unlikely(r != 0))
3694 			goto done;
3695 
3696 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3697 		if (!r) {
3698 			r = gfx_v11_0_gfx_init_queue(ring);
3699 			amdgpu_bo_kunmap(ring->mqd_obj);
3700 			ring->mqd_ptr = NULL;
3701 		}
3702 		amdgpu_bo_unreserve(ring->mqd_obj);
3703 		if (r)
3704 			goto done;
3705 	}
3706 #ifndef BRING_UP_DEBUG
3707 	r = gfx_v11_0_kiq_enable_kgq(adev);
3708 	if (r)
3709 		goto done;
3710 #endif
3711 	r = gfx_v11_0_cp_gfx_start(adev);
3712 	if (r)
3713 		goto done;
3714 
3715 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
3716 		ring = &adev->gfx.gfx_ring[i];
3717 		ring->sched.ready = true;
3718 	}
3719 done:
3720 	return r;
3721 }
3722 
3723 static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
3724 				      struct amdgpu_mqd_prop *prop)
3725 {
3726 	struct v11_compute_mqd *mqd = m;
3727 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3728 	uint32_t tmp;
3729 
3730 	mqd->header = 0xC0310800;
3731 	mqd->compute_pipelinestat_enable = 0x00000001;
3732 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3733 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3734 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3735 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3736 	mqd->compute_misc_reserved = 0x00000007;
3737 
3738 	eop_base_addr = prop->eop_gpu_addr >> 8;
3739 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3740 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3741 
3742 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3743 	tmp = RREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL);
3744 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3745 			(order_base_2(GFX11_MEC_HPD_SIZE / 4) - 1));
3746 
3747 	mqd->cp_hqd_eop_control = tmp;
3748 
3749 	/* enable doorbell? */
3750 	tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
3751 
3752 	if (prop->use_doorbell) {
3753 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3754 				    DOORBELL_OFFSET, prop->doorbell_index);
3755 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3756 				    DOORBELL_EN, 1);
3757 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3758 				    DOORBELL_SOURCE, 0);
3759 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3760 				    DOORBELL_HIT, 0);
3761 	} else {
3762 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3763 				    DOORBELL_EN, 0);
3764 	}
3765 
3766 	mqd->cp_hqd_pq_doorbell_control = tmp;
3767 
3768 	/* disable the queue if it's active */
3769 	mqd->cp_hqd_dequeue_request = 0;
3770 	mqd->cp_hqd_pq_rptr = 0;
3771 	mqd->cp_hqd_pq_wptr_lo = 0;
3772 	mqd->cp_hqd_pq_wptr_hi = 0;
3773 
3774 	/* set the pointer to the MQD */
3775 	mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc;
3776 	mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
3777 
3778 	/* set MQD vmid to 0 */
3779 	tmp = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL);
3780 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3781 	mqd->cp_mqd_control = tmp;
3782 
3783 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3784 	hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
3785 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3786 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3787 
3788 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3789 	tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL);
3790 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3791 			    (order_base_2(prop->queue_size / 4) - 1));
3792 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3793 			    (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
3794 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3795 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
3796 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3797 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3798 	mqd->cp_hqd_pq_control = tmp;
3799 
3800 	/* set the wb address whether it's enabled or not */
3801 	wb_gpu_addr = prop->rptr_gpu_addr;
3802 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3803 	mqd->cp_hqd_pq_rptr_report_addr_hi =
3804 		upper_32_bits(wb_gpu_addr) & 0xffff;
3805 
3806 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3807 	wb_gpu_addr = prop->wptr_gpu_addr;
3808 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3809 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3810 
3811 	tmp = 0;
3812 	/* enable the doorbell if requested */
3813 	if (prop->use_doorbell) {
3814 		tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
3815 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3816 				DOORBELL_OFFSET, prop->doorbell_index);
3817 
3818 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3819 				    DOORBELL_EN, 1);
3820 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3821 				    DOORBELL_SOURCE, 0);
3822 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3823 				    DOORBELL_HIT, 0);
3824 	}
3825 
3826 	mqd->cp_hqd_pq_doorbell_control = tmp;
3827 
3828 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3829 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR);
3830 
3831 	/* set the vmid for the queue */
3832 	mqd->cp_hqd_vmid = 0;
3833 
3834 	tmp = RREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE);
3835 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55);
3836 	mqd->cp_hqd_persistent_state = tmp;
3837 
3838 	/* set MIN_IB_AVAIL_SIZE */
3839 	tmp = RREG32_SOC15(GC, 0, regCP_HQD_IB_CONTROL);
3840 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3841 	mqd->cp_hqd_ib_control = tmp;
3842 
3843 	/* set static priority for a compute queue/ring */
3844 	mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority;
3845 	mqd->cp_hqd_queue_priority = prop->hqd_queue_priority;
3846 
3847 	mqd->cp_hqd_active = prop->hqd_active;
3848 
3849 	return 0;
3850 }
3851 
3852 static int gfx_v11_0_kiq_init_register(struct amdgpu_ring *ring)
3853 {
3854 	struct amdgpu_device *adev = ring->adev;
3855 	struct v11_compute_mqd *mqd = ring->mqd_ptr;
3856 	int j;
3857 
3858 	/* inactivate the queue */
3859 	if (amdgpu_sriov_vf(adev))
3860 		WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0);
3861 
3862 	/* disable wptr polling */
3863 	WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3864 
3865 	/* write the EOP addr */
3866 	WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR,
3867 	       mqd->cp_hqd_eop_base_addr_lo);
3868 	WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI,
3869 	       mqd->cp_hqd_eop_base_addr_hi);
3870 
3871 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3872 	WREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL,
3873 	       mqd->cp_hqd_eop_control);
3874 
3875 	/* enable doorbell? */
3876 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
3877 	       mqd->cp_hqd_pq_doorbell_control);
3878 
3879 	/* disable the queue if it's active */
3880 	if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) {
3881 		WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1);
3882 		for (j = 0; j < adev->usec_timeout; j++) {
3883 			if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
3884 				break;
3885 			udelay(1);
3886 		}
3887 		WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST,
3888 		       mqd->cp_hqd_dequeue_request);
3889 		WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR,
3890 		       mqd->cp_hqd_pq_rptr);
3891 		WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
3892 		       mqd->cp_hqd_pq_wptr_lo);
3893 		WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
3894 		       mqd->cp_hqd_pq_wptr_hi);
3895 	}
3896 
3897 	/* set the pointer to the MQD */
3898 	WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR,
3899 	       mqd->cp_mqd_base_addr_lo);
3900 	WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI,
3901 	       mqd->cp_mqd_base_addr_hi);
3902 
3903 	/* set MQD vmid to 0 */
3904 	WREG32_SOC15(GC, 0, regCP_MQD_CONTROL,
3905 	       mqd->cp_mqd_control);
3906 
3907 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3908 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE,
3909 	       mqd->cp_hqd_pq_base_lo);
3910 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI,
3911 	       mqd->cp_hqd_pq_base_hi);
3912 
3913 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3914 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL,
3915 	       mqd->cp_hqd_pq_control);
3916 
3917 	/* set the wb address whether it's enabled or not */
3918 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR,
3919 		mqd->cp_hqd_pq_rptr_report_addr_lo);
3920 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3921 		mqd->cp_hqd_pq_rptr_report_addr_hi);
3922 
3923 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3924 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR,
3925 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
3926 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3927 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3928 
3929 	/* enable the doorbell if requested */
3930 	if (ring->use_doorbell) {
3931 		WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
3932 			(adev->doorbell_index.kiq * 2) << 2);
3933 		WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
3934 			(adev->doorbell_index.userqueue_end * 2) << 2);
3935 	}
3936 
3937 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
3938 	       mqd->cp_hqd_pq_doorbell_control);
3939 
3940 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3941 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
3942 	       mqd->cp_hqd_pq_wptr_lo);
3943 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
3944 	       mqd->cp_hqd_pq_wptr_hi);
3945 
3946 	/* set the vmid for the queue */
3947 	WREG32_SOC15(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid);
3948 
3949 	WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE,
3950 	       mqd->cp_hqd_persistent_state);
3951 
3952 	/* activate the queue */
3953 	WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE,
3954 	       mqd->cp_hqd_active);
3955 
3956 	if (ring->use_doorbell)
3957 		WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3958 
3959 	return 0;
3960 }
3961 
3962 static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring)
3963 {
3964 	struct amdgpu_device *adev = ring->adev;
3965 	struct v11_compute_mqd *mqd = ring->mqd_ptr;
3966 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3967 
3968 	gfx_v11_0_kiq_setting(ring);
3969 
3970 	if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
3971 		/* reset MQD to a clean status */
3972 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3973 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
3974 
3975 		/* reset ring buffer */
3976 		ring->wptr = 0;
3977 		amdgpu_ring_clear_ring(ring);
3978 
3979 		mutex_lock(&adev->srbm_mutex);
3980 		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3981 		gfx_v11_0_kiq_init_register(ring);
3982 		soc21_grbm_select(adev, 0, 0, 0, 0);
3983 		mutex_unlock(&adev->srbm_mutex);
3984 	} else {
3985 		memset((void *)mqd, 0, sizeof(*mqd));
3986 		if (amdgpu_sriov_vf(adev) && adev->in_suspend)
3987 			amdgpu_ring_clear_ring(ring);
3988 		mutex_lock(&adev->srbm_mutex);
3989 		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3990 		amdgpu_ring_init_mqd(ring);
3991 		gfx_v11_0_kiq_init_register(ring);
3992 		soc21_grbm_select(adev, 0, 0, 0, 0);
3993 		mutex_unlock(&adev->srbm_mutex);
3994 
3995 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3996 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
3997 	}
3998 
3999 	return 0;
4000 }
4001 
4002 static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring)
4003 {
4004 	struct amdgpu_device *adev = ring->adev;
4005 	struct v11_compute_mqd *mqd = ring->mqd_ptr;
4006 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
4007 
4008 	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4009 		memset((void *)mqd, 0, sizeof(*mqd));
4010 		mutex_lock(&adev->srbm_mutex);
4011 		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4012 		amdgpu_ring_init_mqd(ring);
4013 		soc21_grbm_select(adev, 0, 0, 0, 0);
4014 		mutex_unlock(&adev->srbm_mutex);
4015 
4016 		if (adev->gfx.mec.mqd_backup[mqd_idx])
4017 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
4018 	} else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4019 		/* reset MQD to a clean status */
4020 		if (adev->gfx.mec.mqd_backup[mqd_idx])
4021 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
4022 
4023 		/* reset ring buffer */
4024 		ring->wptr = 0;
4025 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
4026 		amdgpu_ring_clear_ring(ring);
4027 	} else {
4028 		amdgpu_ring_clear_ring(ring);
4029 	}
4030 
4031 	return 0;
4032 }
4033 
4034 static int gfx_v11_0_kiq_resume(struct amdgpu_device *adev)
4035 {
4036 	struct amdgpu_ring *ring;
4037 	int r;
4038 
4039 	ring = &adev->gfx.kiq.ring;
4040 
4041 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
4042 	if (unlikely(r != 0))
4043 		return r;
4044 
4045 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
4046 	if (unlikely(r != 0)) {
4047 		amdgpu_bo_unreserve(ring->mqd_obj);
4048 		return r;
4049 	}
4050 
4051 	gfx_v11_0_kiq_init_queue(ring);
4052 	amdgpu_bo_kunmap(ring->mqd_obj);
4053 	ring->mqd_ptr = NULL;
4054 	amdgpu_bo_unreserve(ring->mqd_obj);
4055 	ring->sched.ready = true;
4056 	return 0;
4057 }
4058 
4059 static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev)
4060 {
4061 	struct amdgpu_ring *ring = NULL;
4062 	int r = 0, i;
4063 
4064 	if (!amdgpu_async_gfx_ring)
4065 		gfx_v11_0_cp_compute_enable(adev, true);
4066 
4067 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4068 		ring = &adev->gfx.compute_ring[i];
4069 
4070 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
4071 		if (unlikely(r != 0))
4072 			goto done;
4073 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
4074 		if (!r) {
4075 			r = gfx_v11_0_kcq_init_queue(ring);
4076 			amdgpu_bo_kunmap(ring->mqd_obj);
4077 			ring->mqd_ptr = NULL;
4078 		}
4079 		amdgpu_bo_unreserve(ring->mqd_obj);
4080 		if (r)
4081 			goto done;
4082 	}
4083 
4084 	r = amdgpu_gfx_enable_kcq(adev);
4085 done:
4086 	return r;
4087 }
4088 
4089 static int gfx_v11_0_cp_resume(struct amdgpu_device *adev)
4090 {
4091 	int r, i;
4092 	struct amdgpu_ring *ring;
4093 
4094 	if (!(adev->flags & AMD_IS_APU))
4095 		gfx_v11_0_enable_gui_idle_interrupt(adev, false);
4096 
4097 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4098 		/* legacy firmware loading */
4099 		r = gfx_v11_0_cp_gfx_load_microcode(adev);
4100 		if (r)
4101 			return r;
4102 
4103 		if (adev->gfx.rs64_enable)
4104 			r = gfx_v11_0_cp_compute_load_microcode_rs64(adev);
4105 		else
4106 			r = gfx_v11_0_cp_compute_load_microcode(adev);
4107 		if (r)
4108 			return r;
4109 	}
4110 
4111 	gfx_v11_0_cp_set_doorbell_range(adev);
4112 
4113 	if (amdgpu_async_gfx_ring) {
4114 		gfx_v11_0_cp_compute_enable(adev, true);
4115 		gfx_v11_0_cp_gfx_enable(adev, true);
4116 	}
4117 
4118 	if (adev->enable_mes_kiq && adev->mes.kiq_hw_init)
4119 		r = amdgpu_mes_kiq_hw_init(adev);
4120 	else
4121 		r = gfx_v11_0_kiq_resume(adev);
4122 	if (r)
4123 		return r;
4124 
4125 	r = gfx_v11_0_kcq_resume(adev);
4126 	if (r)
4127 		return r;
4128 
4129 	if (!amdgpu_async_gfx_ring) {
4130 		r = gfx_v11_0_cp_gfx_resume(adev);
4131 		if (r)
4132 			return r;
4133 	} else {
4134 		r = gfx_v11_0_cp_async_gfx_ring_resume(adev);
4135 		if (r)
4136 			return r;
4137 	}
4138 
4139 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4140 		ring = &adev->gfx.gfx_ring[i];
4141 		r = amdgpu_ring_test_helper(ring);
4142 		if (r)
4143 			return r;
4144 	}
4145 
4146 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4147 		ring = &adev->gfx.compute_ring[i];
4148 		r = amdgpu_ring_test_helper(ring);
4149 		if (r)
4150 			return r;
4151 	}
4152 
4153 	return 0;
4154 }
4155 
4156 static void gfx_v11_0_cp_enable(struct amdgpu_device *adev, bool enable)
4157 {
4158 	gfx_v11_0_cp_gfx_enable(adev, enable);
4159 	gfx_v11_0_cp_compute_enable(adev, enable);
4160 }
4161 
4162 static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev)
4163 {
4164 	int r;
4165 	bool value;
4166 
4167 	r = adev->gfxhub.funcs->gart_enable(adev);
4168 	if (r)
4169 		return r;
4170 
4171 	adev->hdp.funcs->flush_hdp(adev, NULL);
4172 
4173 	value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
4174 		false : true;
4175 
4176 	adev->gfxhub.funcs->set_fault_enable_default(adev, value);
4177 	amdgpu_gmc_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB_0, 0);
4178 
4179 	return 0;
4180 }
4181 
4182 static void gfx_v11_0_select_cp_fw_arch(struct amdgpu_device *adev)
4183 {
4184 	u32 tmp;
4185 
4186 	/* select RS64 */
4187 	if (adev->gfx.rs64_enable) {
4188 		tmp = RREG32_SOC15(GC, 0, regCP_GFX_CNTL);
4189 		tmp = REG_SET_FIELD(tmp, CP_GFX_CNTL, ENGINE_SEL, 1);
4190 		WREG32_SOC15(GC, 0, regCP_GFX_CNTL, tmp);
4191 
4192 		tmp = RREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL);
4193 		tmp = REG_SET_FIELD(tmp, CP_MEC_ISA_CNTL, ISA_MODE, 1);
4194 		WREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL, tmp);
4195 	}
4196 
4197 	if (amdgpu_emu_mode == 1)
4198 		msleep(100);
4199 }
4200 
4201 static int get_gb_addr_config(struct amdgpu_device * adev)
4202 {
4203 	u32 gb_addr_config;
4204 
4205 	gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG);
4206 	if (gb_addr_config == 0)
4207 		return -EINVAL;
4208 
4209 	adev->gfx.config.gb_addr_config_fields.num_pkrs =
4210 		1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS);
4211 
4212 	adev->gfx.config.gb_addr_config = gb_addr_config;
4213 
4214 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
4215 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4216 				      GB_ADDR_CONFIG, NUM_PIPES);
4217 
4218 	adev->gfx.config.max_tile_pipes =
4219 		adev->gfx.config.gb_addr_config_fields.num_pipes;
4220 
4221 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
4222 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4223 				      GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS);
4224 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
4225 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4226 				      GB_ADDR_CONFIG, NUM_RB_PER_SE);
4227 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
4228 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4229 				      GB_ADDR_CONFIG, NUM_SHADER_ENGINES);
4230 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
4231 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4232 				      GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE));
4233 
4234 	return 0;
4235 }
4236 
4237 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev)
4238 {
4239 	uint32_t data;
4240 
4241 	data = RREG32_SOC15(GC, 0, regCPC_PSP_DEBUG);
4242 	data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK;
4243 	WREG32_SOC15(GC, 0, regCPC_PSP_DEBUG, data);
4244 
4245 	data = RREG32_SOC15(GC, 0, regCPG_PSP_DEBUG);
4246 	data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK;
4247 	WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data);
4248 }
4249 
4250 static int gfx_v11_0_hw_init(void *handle)
4251 {
4252 	int r;
4253 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4254 
4255 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
4256 		if (adev->gfx.imu.funcs) {
4257 			/* RLC autoload sequence 1: Program rlc ram */
4258 			if (adev->gfx.imu.funcs->program_rlc_ram)
4259 				adev->gfx.imu.funcs->program_rlc_ram(adev);
4260 		}
4261 		/* rlc autoload firmware */
4262 		r = gfx_v11_0_rlc_backdoor_autoload_enable(adev);
4263 		if (r)
4264 			return r;
4265 	} else {
4266 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4267 			if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
4268 				if (adev->gfx.imu.funcs->load_microcode)
4269 					adev->gfx.imu.funcs->load_microcode(adev);
4270 				if (adev->gfx.imu.funcs->setup_imu)
4271 					adev->gfx.imu.funcs->setup_imu(adev);
4272 				if (adev->gfx.imu.funcs->start_imu)
4273 					adev->gfx.imu.funcs->start_imu(adev);
4274 			}
4275 
4276 			/* disable gpa mode in backdoor loading */
4277 			gfx_v11_0_disable_gpa_mode(adev);
4278 		}
4279 	}
4280 
4281 	if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) ||
4282 	    (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
4283 		r = gfx_v11_0_wait_for_rlc_autoload_complete(adev);
4284 		if (r) {
4285 			dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r);
4286 			return r;
4287 		}
4288 	}
4289 
4290 	adev->gfx.is_poweron = true;
4291 
4292 	if(get_gb_addr_config(adev))
4293 		DRM_WARN("Invalid gb_addr_config !\n");
4294 
4295 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
4296 	    adev->gfx.rs64_enable)
4297 		gfx_v11_0_config_gfx_rs64(adev);
4298 
4299 	r = gfx_v11_0_gfxhub_enable(adev);
4300 	if (r)
4301 		return r;
4302 
4303 	if (!amdgpu_emu_mode)
4304 		gfx_v11_0_init_golden_registers(adev);
4305 
4306 	if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) ||
4307 	    (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && amdgpu_dpm == 1)) {
4308 		/**
4309 		 * For gfx 11, rlc firmware loading relies on smu firmware is
4310 		 * loaded firstly, so in direct type, it has to load smc ucode
4311 		 * here before rlc.
4312 		 */
4313 		if (!(adev->flags & AMD_IS_APU)) {
4314 			r = amdgpu_pm_load_smu_firmware(adev, NULL);
4315 			if (r)
4316 				return r;
4317 		}
4318 	}
4319 
4320 	gfx_v11_0_constants_init(adev);
4321 
4322 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
4323 		gfx_v11_0_select_cp_fw_arch(adev);
4324 
4325 	if (adev->nbio.funcs->gc_doorbell_init)
4326 		adev->nbio.funcs->gc_doorbell_init(adev);
4327 
4328 	r = gfx_v11_0_rlc_resume(adev);
4329 	if (r)
4330 		return r;
4331 
4332 	/*
4333 	 * init golden registers and rlc resume may override some registers,
4334 	 * reconfig them here
4335 	 */
4336 	gfx_v11_0_tcp_harvest(adev);
4337 
4338 	r = gfx_v11_0_cp_resume(adev);
4339 	if (r)
4340 		return r;
4341 
4342 	return r;
4343 }
4344 
4345 #ifndef BRING_UP_DEBUG
4346 static int gfx_v11_0_kiq_disable_kgq(struct amdgpu_device *adev)
4347 {
4348 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4349 	struct amdgpu_ring *kiq_ring = &kiq->ring;
4350 	int i, r = 0;
4351 
4352 	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
4353 		return -EINVAL;
4354 
4355 	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
4356 					adev->gfx.num_gfx_rings))
4357 		return -ENOMEM;
4358 
4359 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4360 		kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i],
4361 					   PREEMPT_QUEUES, 0, 0);
4362 
4363 	if (adev->gfx.kiq.ring.sched.ready)
4364 		r = amdgpu_ring_test_helper(kiq_ring);
4365 
4366 	return r;
4367 }
4368 #endif
4369 
4370 static int gfx_v11_0_hw_fini(void *handle)
4371 {
4372 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4373 	int r;
4374 
4375 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4376 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4377 
4378 	if (!adev->no_hw_access) {
4379 #ifndef BRING_UP_DEBUG
4380 		if (amdgpu_async_gfx_ring) {
4381 			r = gfx_v11_0_kiq_disable_kgq(adev);
4382 			if (r)
4383 				DRM_ERROR("KGQ disable failed\n");
4384 		}
4385 #endif
4386 		if (amdgpu_gfx_disable_kcq(adev))
4387 			DRM_ERROR("KCQ disable failed\n");
4388 
4389 		amdgpu_mes_kiq_hw_fini(adev);
4390 	}
4391 
4392 	if (amdgpu_sriov_vf(adev))
4393 		/* Remove the steps disabling CPG and clearing KIQ position,
4394 		 * so that CP could perform IDLE-SAVE during switch. Those
4395 		 * steps are necessary to avoid a DMAR error in gfx9 but it is
4396 		 * not reproduced on gfx11.
4397 		 */
4398 		return 0;
4399 
4400 	gfx_v11_0_cp_enable(adev, false);
4401 	gfx_v11_0_enable_gui_idle_interrupt(adev, false);
4402 
4403 	adev->gfxhub.funcs->gart_disable(adev);
4404 
4405 	adev->gfx.is_poweron = false;
4406 
4407 	return 0;
4408 }
4409 
4410 static int gfx_v11_0_suspend(void *handle)
4411 {
4412 	return gfx_v11_0_hw_fini(handle);
4413 }
4414 
4415 static int gfx_v11_0_resume(void *handle)
4416 {
4417 	return gfx_v11_0_hw_init(handle);
4418 }
4419 
4420 static bool gfx_v11_0_is_idle(void *handle)
4421 {
4422 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4423 
4424 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS),
4425 				GRBM_STATUS, GUI_ACTIVE))
4426 		return false;
4427 	else
4428 		return true;
4429 }
4430 
4431 static int gfx_v11_0_wait_for_idle(void *handle)
4432 {
4433 	unsigned i;
4434 	u32 tmp;
4435 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4436 
4437 	for (i = 0; i < adev->usec_timeout; i++) {
4438 		/* read MC_STATUS */
4439 		tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS) &
4440 			GRBM_STATUS__GUI_ACTIVE_MASK;
4441 
4442 		if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
4443 			return 0;
4444 		udelay(1);
4445 	}
4446 	return -ETIMEDOUT;
4447 }
4448 
4449 static int gfx_v11_0_soft_reset(void *handle)
4450 {
4451 	u32 grbm_soft_reset = 0;
4452 	u32 tmp;
4453 	int i, j, k;
4454 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4455 
4456 	tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
4457 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0);
4458 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 0);
4459 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 0);
4460 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0);
4461 	WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
4462 
4463 	gfx_v11_0_set_safe_mode(adev);
4464 
4465 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
4466 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
4467 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
4468 				tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
4469 				tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i);
4470 				tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j);
4471 				tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k);
4472 				WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
4473 
4474 				WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2);
4475 				WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1);
4476 			}
4477 		}
4478 	}
4479 	for (i = 0; i < adev->gfx.me.num_me; ++i) {
4480 		for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
4481 			for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
4482 				tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
4483 				tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i);
4484 				tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j);
4485 				tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k);
4486 				WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
4487 
4488 				WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1);
4489 			}
4490 		}
4491 	}
4492 
4493 	WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe);
4494 
4495 	// Read CP_VMID_RESET register three times.
4496 	// to get sufficient time for GFX_HQD_ACTIVE reach 0
4497 	RREG32_SOC15(GC, 0, regCP_VMID_RESET);
4498 	RREG32_SOC15(GC, 0, regCP_VMID_RESET);
4499 	RREG32_SOC15(GC, 0, regCP_VMID_RESET);
4500 
4501 	for (i = 0; i < adev->usec_timeout; i++) {
4502 		if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) &&
4503 		    !RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE))
4504 			break;
4505 		udelay(1);
4506 	}
4507 	if (i >= adev->usec_timeout) {
4508 		printk("Failed to wait all pipes clean\n");
4509 		return -EINVAL;
4510 	}
4511 
4512 	/**********  trigger soft reset  ***********/
4513 	grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
4514 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4515 					SOFT_RESET_CP, 1);
4516 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4517 					SOFT_RESET_GFX, 1);
4518 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4519 					SOFT_RESET_CPF, 1);
4520 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4521 					SOFT_RESET_CPC, 1);
4522 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4523 					SOFT_RESET_CPG, 1);
4524 	WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset);
4525 	/**********  exit soft reset  ***********/
4526 	grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
4527 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4528 					SOFT_RESET_CP, 0);
4529 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4530 					SOFT_RESET_GFX, 0);
4531 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4532 					SOFT_RESET_CPF, 0);
4533 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4534 					SOFT_RESET_CPC, 0);
4535 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4536 					SOFT_RESET_CPG, 0);
4537 	WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset);
4538 
4539 	tmp = RREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL);
4540 	tmp = REG_SET_FIELD(tmp, CP_SOFT_RESET_CNTL, CMP_HQD_REG_RESET, 0x1);
4541 	WREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL, tmp);
4542 
4543 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, 0x0);
4544 	WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, 0x0);
4545 
4546 	for (i = 0; i < adev->usec_timeout; i++) {
4547 		if (!RREG32_SOC15(GC, 0, regCP_VMID_RESET))
4548 			break;
4549 		udelay(1);
4550 	}
4551 	if (i >= adev->usec_timeout) {
4552 		printk("Failed to wait CP_VMID_RESET to 0\n");
4553 		return -EINVAL;
4554 	}
4555 
4556 	tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
4557 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
4558 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
4559 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
4560 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
4561 	WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
4562 
4563 	gfx_v11_0_unset_safe_mode(adev);
4564 
4565 	return gfx_v11_0_cp_resume(adev);
4566 }
4567 
4568 static bool gfx_v11_0_check_soft_reset(void *handle)
4569 {
4570 	int i, r;
4571 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4572 	struct amdgpu_ring *ring;
4573 	long tmo = msecs_to_jiffies(1000);
4574 
4575 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4576 		ring = &adev->gfx.gfx_ring[i];
4577 		r = amdgpu_ring_test_ib(ring, tmo);
4578 		if (r)
4579 			return true;
4580 	}
4581 
4582 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4583 		ring = &adev->gfx.compute_ring[i];
4584 		r = amdgpu_ring_test_ib(ring, tmo);
4585 		if (r)
4586 			return true;
4587 	}
4588 
4589 	return false;
4590 }
4591 
4592 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4593 {
4594 	uint64_t clock;
4595 
4596 	amdgpu_gfx_off_ctrl(adev, false);
4597 	mutex_lock(&adev->gfx.gpu_clock_mutex);
4598 	clock = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER) |
4599 		((uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER) << 32ULL);
4600 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
4601 	amdgpu_gfx_off_ctrl(adev, true);
4602 	return clock;
4603 }
4604 
4605 static void gfx_v11_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4606 					   uint32_t vmid,
4607 					   uint32_t gds_base, uint32_t gds_size,
4608 					   uint32_t gws_base, uint32_t gws_size,
4609 					   uint32_t oa_base, uint32_t oa_size)
4610 {
4611 	struct amdgpu_device *adev = ring->adev;
4612 
4613 	/* GDS Base */
4614 	gfx_v11_0_write_data_to_reg(ring, 0, false,
4615 				    SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_BASE) + 2 * vmid,
4616 				    gds_base);
4617 
4618 	/* GDS Size */
4619 	gfx_v11_0_write_data_to_reg(ring, 0, false,
4620 				    SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_SIZE) + 2 * vmid,
4621 				    gds_size);
4622 
4623 	/* GWS */
4624 	gfx_v11_0_write_data_to_reg(ring, 0, false,
4625 				    SOC15_REG_OFFSET(GC, 0, regGDS_GWS_VMID0) + vmid,
4626 				    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4627 
4628 	/* OA */
4629 	gfx_v11_0_write_data_to_reg(ring, 0, false,
4630 				    SOC15_REG_OFFSET(GC, 0, regGDS_OA_VMID0) + vmid,
4631 				    (1 << (oa_size + oa_base)) - (1 << oa_base));
4632 }
4633 
4634 static int gfx_v11_0_early_init(void *handle)
4635 {
4636 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4637 
4638 	adev->gfx.funcs = &gfx_v11_0_gfx_funcs;
4639 
4640 	adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS;
4641 	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4642 					  AMDGPU_MAX_COMPUTE_RINGS);
4643 
4644 	gfx_v11_0_set_kiq_pm4_funcs(adev);
4645 	gfx_v11_0_set_ring_funcs(adev);
4646 	gfx_v11_0_set_irq_funcs(adev);
4647 	gfx_v11_0_set_gds_init(adev);
4648 	gfx_v11_0_set_rlc_funcs(adev);
4649 	gfx_v11_0_set_mqd_funcs(adev);
4650 	gfx_v11_0_set_imu_funcs(adev);
4651 
4652 	gfx_v11_0_init_rlcg_reg_access_ctrl(adev);
4653 
4654 	return gfx_v11_0_init_microcode(adev);
4655 }
4656 
4657 static int gfx_v11_0_ras_late_init(void *handle)
4658 {
4659 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4660 	struct ras_common_if *gfx_common_if;
4661 	int ret;
4662 
4663 	gfx_common_if = kzalloc(sizeof(struct ras_common_if), GFP_KERNEL);
4664 	if (!gfx_common_if)
4665 		return -ENOMEM;
4666 
4667 	gfx_common_if->block = AMDGPU_RAS_BLOCK__GFX;
4668 
4669 	ret = amdgpu_ras_feature_enable(adev, gfx_common_if, true);
4670 	if (ret)
4671 		dev_warn(adev->dev, "Failed to enable gfx11 ras feature\n");
4672 
4673 	kfree(gfx_common_if);
4674 	return 0;
4675 }
4676 
4677 static int gfx_v11_0_late_init(void *handle)
4678 {
4679 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4680 	int r;
4681 
4682 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4683 	if (r)
4684 		return r;
4685 
4686 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4687 	if (r)
4688 		return r;
4689 
4690 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3)) {
4691 		r = gfx_v11_0_ras_late_init(handle);
4692 		if (r)
4693 			return r;
4694 	}
4695 
4696 	return 0;
4697 }
4698 
4699 static bool gfx_v11_0_is_rlc_enabled(struct amdgpu_device *adev)
4700 {
4701 	uint32_t rlc_cntl;
4702 
4703 	/* if RLC is not enabled, do nothing */
4704 	rlc_cntl = RREG32_SOC15(GC, 0, regRLC_CNTL);
4705 	return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false;
4706 }
4707 
4708 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev)
4709 {
4710 	uint32_t data;
4711 	unsigned i;
4712 
4713 	data = RLC_SAFE_MODE__CMD_MASK;
4714 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4715 
4716 	WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data);
4717 
4718 	/* wait for RLC_SAFE_MODE */
4719 	for (i = 0; i < adev->usec_timeout; i++) {
4720 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE),
4721 				   RLC_SAFE_MODE, CMD))
4722 			break;
4723 		udelay(1);
4724 	}
4725 }
4726 
4727 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev)
4728 {
4729 	WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK);
4730 }
4731 
4732 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
4733 				      bool enable)
4734 {
4735 	uint32_t def, data;
4736 
4737 	if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK))
4738 		return;
4739 
4740 	def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4741 
4742 	if (enable)
4743 		data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
4744 	else
4745 		data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
4746 
4747 	if (def != data)
4748 		WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4749 }
4750 
4751 static void gfx_v11_0_update_sram_fgcg(struct amdgpu_device *adev,
4752 				       bool enable)
4753 {
4754 	uint32_t def, data;
4755 
4756 	if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG))
4757 		return;
4758 
4759 	def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4760 
4761 	if (enable)
4762 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
4763 	else
4764 		data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
4765 
4766 	if (def != data)
4767 		WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4768 }
4769 
4770 static void gfx_v11_0_update_repeater_fgcg(struct amdgpu_device *adev,
4771 					   bool enable)
4772 {
4773 	uint32_t def, data;
4774 
4775 	if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG))
4776 		return;
4777 
4778 	def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4779 
4780 	if (enable)
4781 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK;
4782 	else
4783 		data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK;
4784 
4785 	if (def != data)
4786 		WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4787 }
4788 
4789 static void gfx_v11_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4790 						       bool enable)
4791 {
4792 	uint32_t data, def;
4793 
4794 	if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)))
4795 		return;
4796 
4797 	/* It is disabled by HW by default */
4798 	if (enable) {
4799 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
4800 			/* 1 - RLC_CGTT_MGCG_OVERRIDE */
4801 			def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4802 
4803 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4804 				  RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4805 				  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
4806 
4807 			if (def != data)
4808 				WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4809 		}
4810 	} else {
4811 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
4812 			def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4813 
4814 			data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4815 				 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4816 				 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
4817 
4818 			if (def != data)
4819 				WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4820 		}
4821 	}
4822 }
4823 
4824 static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4825 						       bool enable)
4826 {
4827 	uint32_t def, data;
4828 
4829 	if (!(adev->cg_flags &
4830 	      (AMD_CG_SUPPORT_GFX_CGCG |
4831 	      AMD_CG_SUPPORT_GFX_CGLS |
4832 	      AMD_CG_SUPPORT_GFX_3D_CGCG |
4833 	      AMD_CG_SUPPORT_GFX_3D_CGLS)))
4834 		return;
4835 
4836 	if (enable) {
4837 		def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4838 
4839 		/* unset CGCG override */
4840 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
4841 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4842 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4843 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4844 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG ||
4845 		    adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4846 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4847 
4848 		/* update CGCG override bits */
4849 		if (def != data)
4850 			WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4851 
4852 		/* enable cgcg FSM(0x0000363F) */
4853 		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
4854 
4855 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
4856 			data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK;
4857 			data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4858 				 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4859 		}
4860 
4861 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
4862 			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK;
4863 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4864 				 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4865 		}
4866 
4867 		if (def != data)
4868 			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
4869 
4870 		/* Program RLC_CGCG_CGLS_CTRL_3D */
4871 		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
4872 
4873 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
4874 			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD_MASK;
4875 			data |= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4876 				 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4877 		}
4878 
4879 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
4880 			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY_MASK;
4881 			data |= (0xf << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4882 				 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4883 		}
4884 
4885 		if (def != data)
4886 			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
4887 
4888 		/* set IDLE_POLL_COUNT(0x00900100) */
4889 		def = data = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL);
4890 
4891 		data &= ~(CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK | CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK);
4892 		data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4893 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4894 
4895 		if (def != data)
4896 			WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data);
4897 
4898 		data = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
4899 		data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
4900 		data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
4901 		data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
4902 		data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
4903 		WREG32_SOC15(GC, 0, regCP_INT_CNTL, data);
4904 
4905 		data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
4906 		data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
4907 		WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
4908 
4909 		/* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
4910 		if (adev->sdma.num_instances > 1) {
4911 			data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
4912 			data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
4913 			WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
4914 		}
4915 	} else {
4916 		/* Program RLC_CGCG_CGLS_CTRL */
4917 		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
4918 
4919 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
4920 			data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4921 
4922 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4923 			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4924 
4925 		if (def != data)
4926 			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
4927 
4928 		/* Program RLC_CGCG_CGLS_CTRL_3D */
4929 		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
4930 
4931 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
4932 			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4933 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4934 			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4935 
4936 		if (def != data)
4937 			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
4938 
4939 		data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
4940 		data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
4941 		WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
4942 
4943 		/* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
4944 		if (adev->sdma.num_instances > 1) {
4945 			data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
4946 			data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
4947 			WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
4948 		}
4949 	}
4950 }
4951 
4952 static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4953 					    bool enable)
4954 {
4955 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4956 
4957 	gfx_v11_0_update_coarse_grain_clock_gating(adev, enable);
4958 
4959 	gfx_v11_0_update_medium_grain_clock_gating(adev, enable);
4960 
4961 	gfx_v11_0_update_repeater_fgcg(adev, enable);
4962 
4963 	gfx_v11_0_update_sram_fgcg(adev, enable);
4964 
4965 	gfx_v11_0_update_perf_clk(adev, enable);
4966 
4967 	if (adev->cg_flags &
4968 	    (AMD_CG_SUPPORT_GFX_MGCG |
4969 	     AMD_CG_SUPPORT_GFX_CGLS |
4970 	     AMD_CG_SUPPORT_GFX_CGCG |
4971 	     AMD_CG_SUPPORT_GFX_3D_CGCG |
4972 	     AMD_CG_SUPPORT_GFX_3D_CGLS))
4973 	        gfx_v11_0_enable_gui_idle_interrupt(adev, enable);
4974 
4975 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4976 
4977 	return 0;
4978 }
4979 
4980 static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
4981 {
4982 	u32 reg, data;
4983 
4984 	amdgpu_gfx_off_ctrl(adev, false);
4985 
4986 	reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL);
4987 	if (amdgpu_sriov_is_pp_one_vf(adev))
4988 		data = RREG32_NO_KIQ(reg);
4989 	else
4990 		data = RREG32(reg);
4991 
4992 	data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
4993 	data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
4994 
4995 	if (amdgpu_sriov_is_pp_one_vf(adev))
4996 		WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data);
4997 	else
4998 		WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data);
4999 
5000 	amdgpu_gfx_off_ctrl(adev, true);
5001 }
5002 
5003 static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = {
5004 	.is_rlc_enabled = gfx_v11_0_is_rlc_enabled,
5005 	.set_safe_mode = gfx_v11_0_set_safe_mode,
5006 	.unset_safe_mode = gfx_v11_0_unset_safe_mode,
5007 	.init = gfx_v11_0_rlc_init,
5008 	.get_csb_size = gfx_v11_0_get_csb_size,
5009 	.get_csb_buffer = gfx_v11_0_get_csb_buffer,
5010 	.resume = gfx_v11_0_rlc_resume,
5011 	.stop = gfx_v11_0_rlc_stop,
5012 	.reset = gfx_v11_0_rlc_reset,
5013 	.start = gfx_v11_0_rlc_start,
5014 	.update_spm_vmid = gfx_v11_0_update_spm_vmid,
5015 };
5016 
5017 static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable)
5018 {
5019 	u32 data = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
5020 
5021 	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5022 		data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5023 	else
5024 		data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5025 
5026 	WREG32_SOC15(GC, 0, regRLC_PG_CNTL, data);
5027 
5028 	// Program RLC_PG_DELAY3 for CGPG hysteresis
5029 	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
5030 		switch (adev->ip_versions[GC_HWIP][0]) {
5031 		case IP_VERSION(11, 0, 1):
5032 		case IP_VERSION(11, 0, 4):
5033 			WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1);
5034 			break;
5035 		default:
5036 			break;
5037 		}
5038 	}
5039 }
5040 
5041 static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable)
5042 {
5043 	amdgpu_gfx_rlc_enter_safe_mode(adev);
5044 
5045 	gfx_v11_cntl_power_gating(adev, enable);
5046 
5047 	amdgpu_gfx_rlc_exit_safe_mode(adev);
5048 }
5049 
5050 static int gfx_v11_0_set_powergating_state(void *handle,
5051 					   enum amd_powergating_state state)
5052 {
5053 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5054 	bool enable = (state == AMD_PG_STATE_GATE);
5055 
5056 	if (amdgpu_sriov_vf(adev))
5057 		return 0;
5058 
5059 	switch (adev->ip_versions[GC_HWIP][0]) {
5060 	case IP_VERSION(11, 0, 0):
5061 	case IP_VERSION(11, 0, 2):
5062 	case IP_VERSION(11, 0, 3):
5063 		amdgpu_gfx_off_ctrl(adev, enable);
5064 		break;
5065 	case IP_VERSION(11, 0, 1):
5066 	case IP_VERSION(11, 0, 4):
5067 		gfx_v11_cntl_pg(adev, enable);
5068 		amdgpu_gfx_off_ctrl(adev, enable);
5069 		break;
5070 	default:
5071 		break;
5072 	}
5073 
5074 	return 0;
5075 }
5076 
5077 static int gfx_v11_0_set_clockgating_state(void *handle,
5078 					  enum amd_clockgating_state state)
5079 {
5080 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5081 
5082 	if (amdgpu_sriov_vf(adev))
5083 	        return 0;
5084 
5085 	switch (adev->ip_versions[GC_HWIP][0]) {
5086 	case IP_VERSION(11, 0, 0):
5087 	case IP_VERSION(11, 0, 1):
5088 	case IP_VERSION(11, 0, 2):
5089 	case IP_VERSION(11, 0, 3):
5090 	case IP_VERSION(11, 0, 4):
5091 	        gfx_v11_0_update_gfx_clock_gating(adev,
5092 	                        state ==  AMD_CG_STATE_GATE);
5093 	        break;
5094 	default:
5095 	        break;
5096 	}
5097 
5098 	return 0;
5099 }
5100 
5101 static void gfx_v11_0_get_clockgating_state(void *handle, u64 *flags)
5102 {
5103 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5104 	int data;
5105 
5106 	/* AMD_CG_SUPPORT_GFX_MGCG */
5107 	data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5108 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5109 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5110 
5111 	/* AMD_CG_SUPPORT_REPEATER_FGCG */
5112 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK))
5113 		*flags |= AMD_CG_SUPPORT_REPEATER_FGCG;
5114 
5115 	/* AMD_CG_SUPPORT_GFX_FGCG */
5116 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK))
5117 		*flags |= AMD_CG_SUPPORT_GFX_FGCG;
5118 
5119 	/* AMD_CG_SUPPORT_GFX_PERF_CLK */
5120 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK))
5121 		*flags |= AMD_CG_SUPPORT_GFX_PERF_CLK;
5122 
5123 	/* AMD_CG_SUPPORT_GFX_CGCG */
5124 	data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
5125 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5126 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5127 
5128 	/* AMD_CG_SUPPORT_GFX_CGLS */
5129 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5130 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5131 
5132 	/* AMD_CG_SUPPORT_GFX_3D_CGCG */
5133 	data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
5134 	if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5135 		*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5136 
5137 	/* AMD_CG_SUPPORT_GFX_3D_CGLS */
5138 	if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5139 		*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5140 }
5141 
5142 static u64 gfx_v11_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5143 {
5144 	/* gfx11 is 32bit rptr*/
5145 	return *(uint32_t *)ring->rptr_cpu_addr;
5146 }
5147 
5148 static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5149 {
5150 	struct amdgpu_device *adev = ring->adev;
5151 	u64 wptr;
5152 
5153 	/* XXX check if swapping is necessary on BE */
5154 	if (ring->use_doorbell) {
5155 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5156 	} else {
5157 		wptr = RREG32_SOC15(GC, 0, regCP_RB0_WPTR);
5158 		wptr += (u64)RREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI) << 32;
5159 	}
5160 
5161 	return wptr;
5162 }
5163 
5164 static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5165 {
5166 	struct amdgpu_device *adev = ring->adev;
5167 	uint32_t *wptr_saved;
5168 	uint32_t *is_queue_unmap;
5169 	uint64_t aggregated_db_index;
5170 	uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size;
5171 	uint64_t wptr_tmp;
5172 
5173 	if (ring->is_mes_queue) {
5174 		wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
5175 		is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
5176 					      sizeof(uint32_t));
5177 		aggregated_db_index =
5178 			amdgpu_mes_get_aggregated_doorbell_index(adev,
5179 								 ring->hw_prio);
5180 
5181 		wptr_tmp = ring->wptr & ring->buf_mask;
5182 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
5183 		*wptr_saved = wptr_tmp;
5184 		/* assume doorbell always being used by mes mapped queue */
5185 		if (*is_queue_unmap) {
5186 			WDOORBELL64(aggregated_db_index, wptr_tmp);
5187 			WDOORBELL64(ring->doorbell_index, wptr_tmp);
5188 		} else {
5189 			WDOORBELL64(ring->doorbell_index, wptr_tmp);
5190 
5191 			if (*is_queue_unmap)
5192 				WDOORBELL64(aggregated_db_index, wptr_tmp);
5193 		}
5194 	} else {
5195 		if (ring->use_doorbell) {
5196 			/* XXX check if swapping is necessary on BE */
5197 			atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
5198 				     ring->wptr);
5199 			WDOORBELL64(ring->doorbell_index, ring->wptr);
5200 		} else {
5201 			WREG32_SOC15(GC, 0, regCP_RB0_WPTR,
5202 				     lower_32_bits(ring->wptr));
5203 			WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI,
5204 				     upper_32_bits(ring->wptr));
5205 		}
5206 	}
5207 }
5208 
5209 static u64 gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5210 {
5211 	/* gfx11 hardware is 32bit rptr */
5212 	return *(uint32_t *)ring->rptr_cpu_addr;
5213 }
5214 
5215 static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5216 {
5217 	u64 wptr;
5218 
5219 	/* XXX check if swapping is necessary on BE */
5220 	if (ring->use_doorbell)
5221 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5222 	else
5223 		BUG();
5224 	return wptr;
5225 }
5226 
5227 static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5228 {
5229 	struct amdgpu_device *adev = ring->adev;
5230 	uint32_t *wptr_saved;
5231 	uint32_t *is_queue_unmap;
5232 	uint64_t aggregated_db_index;
5233 	uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size;
5234 	uint64_t wptr_tmp;
5235 
5236 	if (ring->is_mes_queue) {
5237 		wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
5238 		is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
5239 					      sizeof(uint32_t));
5240 		aggregated_db_index =
5241 			amdgpu_mes_get_aggregated_doorbell_index(adev,
5242 								 ring->hw_prio);
5243 
5244 		wptr_tmp = ring->wptr & ring->buf_mask;
5245 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
5246 		*wptr_saved = wptr_tmp;
5247 		/* assume doorbell always used by mes mapped queue */
5248 		if (*is_queue_unmap) {
5249 			WDOORBELL64(aggregated_db_index, wptr_tmp);
5250 			WDOORBELL64(ring->doorbell_index, wptr_tmp);
5251 		} else {
5252 			WDOORBELL64(ring->doorbell_index, wptr_tmp);
5253 
5254 			if (*is_queue_unmap)
5255 				WDOORBELL64(aggregated_db_index, wptr_tmp);
5256 		}
5257 	} else {
5258 		/* XXX check if swapping is necessary on BE */
5259 		if (ring->use_doorbell) {
5260 			atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
5261 				     ring->wptr);
5262 			WDOORBELL64(ring->doorbell_index, ring->wptr);
5263 		} else {
5264 			BUG(); /* only DOORBELL method supported on gfx11 now */
5265 		}
5266 	}
5267 }
5268 
5269 static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5270 {
5271 	struct amdgpu_device *adev = ring->adev;
5272 	u32 ref_and_mask, reg_mem_engine;
5273 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5274 
5275 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5276 		switch (ring->me) {
5277 		case 1:
5278 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5279 			break;
5280 		case 2:
5281 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5282 			break;
5283 		default:
5284 			return;
5285 		}
5286 		reg_mem_engine = 0;
5287 	} else {
5288 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5289 		reg_mem_engine = 1; /* pfp */
5290 	}
5291 
5292 	gfx_v11_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5293 			       adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5294 			       adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5295 			       ref_and_mask, ref_and_mask, 0x20);
5296 }
5297 
5298 static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5299 				       struct amdgpu_job *job,
5300 				       struct amdgpu_ib *ib,
5301 				       uint32_t flags)
5302 {
5303 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5304 	u32 header, control = 0;
5305 
5306 	BUG_ON(ib->flags & AMDGPU_IB_FLAG_CE);
5307 
5308 	header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5309 
5310 	control |= ib->length_dw | (vmid << 24);
5311 
5312 	if (amdgpu_mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5313 		control |= INDIRECT_BUFFER_PRE_ENB(1);
5314 
5315 		if (flags & AMDGPU_IB_PREEMPTED)
5316 			control |= INDIRECT_BUFFER_PRE_RESUME(1);
5317 
5318 		if (vmid)
5319 			gfx_v11_0_ring_emit_de_meta(ring,
5320 				    (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false);
5321 	}
5322 
5323 	if (ring->is_mes_queue)
5324 		/* inherit vmid from mqd */
5325 		control |= 0x400000;
5326 
5327 	amdgpu_ring_write(ring, header);
5328 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5329 	amdgpu_ring_write(ring,
5330 #ifdef __BIG_ENDIAN
5331 		(2 << 0) |
5332 #endif
5333 		lower_32_bits(ib->gpu_addr));
5334 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5335 	amdgpu_ring_write(ring, control);
5336 }
5337 
5338 static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5339 					   struct amdgpu_job *job,
5340 					   struct amdgpu_ib *ib,
5341 					   uint32_t flags)
5342 {
5343 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5344 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5345 
5346 	if (ring->is_mes_queue)
5347 		/* inherit vmid from mqd */
5348 		control |= 0x40000000;
5349 
5350 	/* Currently, there is a high possibility to get wave ID mismatch
5351 	 * between ME and GDS, leading to a hw deadlock, because ME generates
5352 	 * different wave IDs than the GDS expects. This situation happens
5353 	 * randomly when at least 5 compute pipes use GDS ordered append.
5354 	 * The wave IDs generated by ME are also wrong after suspend/resume.
5355 	 * Those are probably bugs somewhere else in the kernel driver.
5356 	 *
5357 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5358 	 * GDS to 0 for this ring (me/pipe).
5359 	 */
5360 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5361 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5362 		amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID);
5363 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5364 	}
5365 
5366 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5367 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5368 	amdgpu_ring_write(ring,
5369 #ifdef __BIG_ENDIAN
5370 				(2 << 0) |
5371 #endif
5372 				lower_32_bits(ib->gpu_addr));
5373 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5374 	amdgpu_ring_write(ring, control);
5375 }
5376 
5377 static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5378 				     u64 seq, unsigned flags)
5379 {
5380 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5381 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5382 
5383 	/* RELEASE_MEM - flush caches, send int */
5384 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5385 	amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ |
5386 				 PACKET3_RELEASE_MEM_GCR_GL2_WB |
5387 				 PACKET3_RELEASE_MEM_GCR_GL2_INV |
5388 				 PACKET3_RELEASE_MEM_GCR_GL2_US |
5389 				 PACKET3_RELEASE_MEM_GCR_GL1_INV |
5390 				 PACKET3_RELEASE_MEM_GCR_GLV_INV |
5391 				 PACKET3_RELEASE_MEM_GCR_GLM_INV |
5392 				 PACKET3_RELEASE_MEM_GCR_GLM_WB |
5393 				 PACKET3_RELEASE_MEM_CACHE_POLICY(3) |
5394 				 PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5395 				 PACKET3_RELEASE_MEM_EVENT_INDEX(5)));
5396 	amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) |
5397 				 PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0)));
5398 
5399 	/*
5400 	 * the address should be Qword aligned if 64bit write, Dword
5401 	 * aligned if only send 32bit data low (discard data high)
5402 	 */
5403 	if (write64bit)
5404 		BUG_ON(addr & 0x7);
5405 	else
5406 		BUG_ON(addr & 0x3);
5407 	amdgpu_ring_write(ring, lower_32_bits(addr));
5408 	amdgpu_ring_write(ring, upper_32_bits(addr));
5409 	amdgpu_ring_write(ring, lower_32_bits(seq));
5410 	amdgpu_ring_write(ring, upper_32_bits(seq));
5411 	amdgpu_ring_write(ring, ring->is_mes_queue ?
5412 			 (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0);
5413 }
5414 
5415 static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5416 {
5417 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5418 	uint32_t seq = ring->fence_drv.sync_seq;
5419 	uint64_t addr = ring->fence_drv.gpu_addr;
5420 
5421 	gfx_v11_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr),
5422 			       upper_32_bits(addr), seq, 0xffffffff, 4);
5423 }
5424 
5425 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
5426 				   uint16_t pasid, uint32_t flush_type,
5427 				   bool all_hub, uint8_t dst_sel)
5428 {
5429 	amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
5430 	amdgpu_ring_write(ring,
5431 			  PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) |
5432 			  PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
5433 			  PACKET3_INVALIDATE_TLBS_PASID(pasid) |
5434 			  PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
5435 }
5436 
5437 static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5438 					 unsigned vmid, uint64_t pd_addr)
5439 {
5440 	if (ring->is_mes_queue)
5441 		gfx_v11_0_ring_invalidate_tlbs(ring, 0, 0, false, 0);
5442 	else
5443 		amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5444 
5445 	/* compute doesn't have PFP */
5446 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5447 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5448 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5449 		amdgpu_ring_write(ring, 0x0);
5450 	}
5451 }
5452 
5453 static void gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5454 					  u64 seq, unsigned int flags)
5455 {
5456 	struct amdgpu_device *adev = ring->adev;
5457 
5458 	/* we only allocate 32bit for each seq wb address */
5459 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5460 
5461 	/* write fence seq to the "addr" */
5462 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5463 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5464 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5465 	amdgpu_ring_write(ring, lower_32_bits(addr));
5466 	amdgpu_ring_write(ring, upper_32_bits(addr));
5467 	amdgpu_ring_write(ring, lower_32_bits(seq));
5468 
5469 	if (flags & AMDGPU_FENCE_FLAG_INT) {
5470 		/* set register to trigger INT */
5471 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5472 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5473 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5474 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS));
5475 		amdgpu_ring_write(ring, 0);
5476 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5477 	}
5478 }
5479 
5480 static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
5481 					 uint32_t flags)
5482 {
5483 	uint32_t dw2 = 0;
5484 
5485 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5486 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5487 		/* set load_global_config & load_global_uconfig */
5488 		dw2 |= 0x8001;
5489 		/* set load_cs_sh_regs */
5490 		dw2 |= 0x01000000;
5491 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
5492 		dw2 |= 0x10002;
5493 	}
5494 
5495 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5496 	amdgpu_ring_write(ring, dw2);
5497 	amdgpu_ring_write(ring, 0);
5498 }
5499 
5500 static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5501 {
5502 	unsigned ret;
5503 
5504 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5505 	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5506 	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5507 	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5508 	ret = ring->wptr & ring->buf_mask;
5509 	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5510 
5511 	return ret;
5512 }
5513 
5514 static void gfx_v11_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5515 {
5516 	unsigned cur;
5517 	BUG_ON(offset > ring->buf_mask);
5518 	BUG_ON(ring->ring[offset] != 0x55aa55aa);
5519 
5520 	cur = (ring->wptr - 1) & ring->buf_mask;
5521 	if (likely(cur > offset))
5522 		ring->ring[offset] = cur - offset;
5523 	else
5524 		ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
5525 }
5526 
5527 static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring)
5528 {
5529 	int i, r = 0;
5530 	struct amdgpu_device *adev = ring->adev;
5531 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
5532 	struct amdgpu_ring *kiq_ring = &kiq->ring;
5533 	unsigned long flags;
5534 
5535 	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
5536 		return -EINVAL;
5537 
5538 	spin_lock_irqsave(&kiq->ring_lock, flags);
5539 
5540 	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
5541 		spin_unlock_irqrestore(&kiq->ring_lock, flags);
5542 		return -ENOMEM;
5543 	}
5544 
5545 	/* assert preemption condition */
5546 	amdgpu_ring_set_preempt_cond_exec(ring, false);
5547 
5548 	/* assert IB preemption, emit the trailing fence */
5549 	kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
5550 				   ring->trail_fence_gpu_addr,
5551 				   ++ring->trail_seq);
5552 	amdgpu_ring_commit(kiq_ring);
5553 
5554 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
5555 
5556 	/* poll the trailing fence */
5557 	for (i = 0; i < adev->usec_timeout; i++) {
5558 		if (ring->trail_seq ==
5559 		    le32_to_cpu(*(ring->trail_fence_cpu_addr)))
5560 			break;
5561 		udelay(1);
5562 	}
5563 
5564 	if (i >= adev->usec_timeout) {
5565 		r = -EINVAL;
5566 		DRM_ERROR("ring %d failed to preempt ib\n", ring->idx);
5567 	}
5568 
5569 	/* deassert preemption condition */
5570 	amdgpu_ring_set_preempt_cond_exec(ring, true);
5571 	return r;
5572 }
5573 
5574 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
5575 {
5576 	struct amdgpu_device *adev = ring->adev;
5577 	struct v10_de_ib_state de_payload = {0};
5578 	uint64_t offset, gds_addr, de_payload_gpu_addr;
5579 	void *de_payload_cpu_addr;
5580 	int cnt;
5581 
5582 	if (ring->is_mes_queue) {
5583 		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5584 				  gfx[0].gfx_meta_data) +
5585 			offsetof(struct v10_gfx_meta_data, de_payload);
5586 		de_payload_gpu_addr =
5587 			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5588 		de_payload_cpu_addr =
5589 			amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5590 
5591 		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5592 				  gfx[0].gds_backup) +
5593 			offsetof(struct v10_gfx_meta_data, de_payload);
5594 		gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5595 	} else {
5596 		offset = offsetof(struct v10_gfx_meta_data, de_payload);
5597 		de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5598 		de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5599 
5600 		gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
5601 				 AMDGPU_CSA_SIZE - adev->gds.gds_size,
5602 				 PAGE_SIZE);
5603 	}
5604 
5605 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5606 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5607 
5608 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5609 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5610 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5611 				 WRITE_DATA_DST_SEL(8) |
5612 				 WR_CONFIRM) |
5613 				 WRITE_DATA_CACHE_POLICY(0));
5614 	amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
5615 	amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
5616 
5617 	if (resume)
5618 		amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
5619 					   sizeof(de_payload) >> 2);
5620 	else
5621 		amdgpu_ring_write_multiple(ring, (void *)&de_payload,
5622 					   sizeof(de_payload) >> 2);
5623 }
5624 
5625 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5626 				    bool secure)
5627 {
5628 	uint32_t v = secure ? FRAME_TMZ : 0;
5629 
5630 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5631 	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5632 }
5633 
5634 static void gfx_v11_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5635 				     uint32_t reg_val_offs)
5636 {
5637 	struct amdgpu_device *adev = ring->adev;
5638 
5639 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5640 	amdgpu_ring_write(ring, 0 |	/* src: register*/
5641 				(5 << 8) |	/* dst: memory */
5642 				(1 << 20));	/* write confirm */
5643 	amdgpu_ring_write(ring, reg);
5644 	amdgpu_ring_write(ring, 0);
5645 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5646 				reg_val_offs * 4));
5647 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5648 				reg_val_offs * 4));
5649 }
5650 
5651 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5652 				   uint32_t val)
5653 {
5654 	uint32_t cmd = 0;
5655 
5656 	switch (ring->funcs->type) {
5657 	case AMDGPU_RING_TYPE_GFX:
5658 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5659 		break;
5660 	case AMDGPU_RING_TYPE_KIQ:
5661 		cmd = (1 << 16); /* no inc addr */
5662 		break;
5663 	default:
5664 		cmd = WR_CONFIRM;
5665 		break;
5666 	}
5667 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5668 	amdgpu_ring_write(ring, cmd);
5669 	amdgpu_ring_write(ring, reg);
5670 	amdgpu_ring_write(ring, 0);
5671 	amdgpu_ring_write(ring, val);
5672 }
5673 
5674 static void gfx_v11_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5675 					uint32_t val, uint32_t mask)
5676 {
5677 	gfx_v11_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5678 }
5679 
5680 static void gfx_v11_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5681 						   uint32_t reg0, uint32_t reg1,
5682 						   uint32_t ref, uint32_t mask)
5683 {
5684 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5685 
5686 	gfx_v11_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5687 			       ref, mask, 0x20);
5688 }
5689 
5690 static void gfx_v11_0_ring_soft_recovery(struct amdgpu_ring *ring,
5691 					 unsigned vmid)
5692 {
5693 	struct amdgpu_device *adev = ring->adev;
5694 	uint32_t value = 0;
5695 
5696 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5697 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5698 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5699 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5700 	WREG32_SOC15(GC, 0, regSQ_CMD, value);
5701 }
5702 
5703 static void
5704 gfx_v11_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5705 				      uint32_t me, uint32_t pipe,
5706 				      enum amdgpu_interrupt_state state)
5707 {
5708 	uint32_t cp_int_cntl, cp_int_cntl_reg;
5709 
5710 	if (!me) {
5711 		switch (pipe) {
5712 		case 0:
5713 			cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0);
5714 			break;
5715 		case 1:
5716 			cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1);
5717 			break;
5718 		default:
5719 			DRM_DEBUG("invalid pipe %d\n", pipe);
5720 			return;
5721 		}
5722 	} else {
5723 		DRM_DEBUG("invalid me %d\n", me);
5724 		return;
5725 	}
5726 
5727 	switch (state) {
5728 	case AMDGPU_IRQ_STATE_DISABLE:
5729 		cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
5730 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5731 					    TIME_STAMP_INT_ENABLE, 0);
5732 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5733 					    GENERIC0_INT_ENABLE, 0);
5734 		WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
5735 		break;
5736 	case AMDGPU_IRQ_STATE_ENABLE:
5737 		cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
5738 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5739 					    TIME_STAMP_INT_ENABLE, 1);
5740 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5741 					    GENERIC0_INT_ENABLE, 1);
5742 		WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
5743 		break;
5744 	default:
5745 		break;
5746 	}
5747 }
5748 
5749 static void gfx_v11_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5750 						     int me, int pipe,
5751 						     enum amdgpu_interrupt_state state)
5752 {
5753 	u32 mec_int_cntl, mec_int_cntl_reg;
5754 
5755 	/*
5756 	 * amdgpu controls only the first MEC. That's why this function only
5757 	 * handles the setting of interrupts for this specific MEC. All other
5758 	 * pipes' interrupts are set by amdkfd.
5759 	 */
5760 
5761 	if (me == 1) {
5762 		switch (pipe) {
5763 		case 0:
5764 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
5765 			break;
5766 		case 1:
5767 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL);
5768 			break;
5769 		case 2:
5770 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL);
5771 			break;
5772 		case 3:
5773 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL);
5774 			break;
5775 		default:
5776 			DRM_DEBUG("invalid pipe %d\n", pipe);
5777 			return;
5778 		}
5779 	} else {
5780 		DRM_DEBUG("invalid me %d\n", me);
5781 		return;
5782 	}
5783 
5784 	switch (state) {
5785 	case AMDGPU_IRQ_STATE_DISABLE:
5786 		mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
5787 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5788 					     TIME_STAMP_INT_ENABLE, 0);
5789 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5790 					     GENERIC0_INT_ENABLE, 0);
5791 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5792 		break;
5793 	case AMDGPU_IRQ_STATE_ENABLE:
5794 		mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
5795 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5796 					     TIME_STAMP_INT_ENABLE, 1);
5797 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5798 					     GENERIC0_INT_ENABLE, 1);
5799 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5800 		break;
5801 	default:
5802 		break;
5803 	}
5804 }
5805 
5806 static int gfx_v11_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5807 					    struct amdgpu_irq_src *src,
5808 					    unsigned type,
5809 					    enum amdgpu_interrupt_state state)
5810 {
5811 	switch (type) {
5812 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5813 		gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 0, state);
5814 		break;
5815 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP:
5816 		gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 1, state);
5817 		break;
5818 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5819 		gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5820 		break;
5821 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5822 		gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5823 		break;
5824 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5825 		gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5826 		break;
5827 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5828 		gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5829 		break;
5830 	default:
5831 		break;
5832 	}
5833 	return 0;
5834 }
5835 
5836 static int gfx_v11_0_eop_irq(struct amdgpu_device *adev,
5837 			     struct amdgpu_irq_src *source,
5838 			     struct amdgpu_iv_entry *entry)
5839 {
5840 	int i;
5841 	u8 me_id, pipe_id, queue_id;
5842 	struct amdgpu_ring *ring;
5843 	uint32_t mes_queue_id = entry->src_data[0];
5844 
5845 	DRM_DEBUG("IH: CP EOP\n");
5846 
5847 	if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
5848 		struct amdgpu_mes_queue *queue;
5849 
5850 		mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
5851 
5852 		spin_lock(&adev->mes.queue_id_lock);
5853 		queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
5854 		if (queue) {
5855 			DRM_DEBUG("process mes queue id = %d\n", mes_queue_id);
5856 			amdgpu_fence_process(queue->ring);
5857 		}
5858 		spin_unlock(&adev->mes.queue_id_lock);
5859 	} else {
5860 		me_id = (entry->ring_id & 0x0c) >> 2;
5861 		pipe_id = (entry->ring_id & 0x03) >> 0;
5862 		queue_id = (entry->ring_id & 0x70) >> 4;
5863 
5864 		switch (me_id) {
5865 		case 0:
5866 			if (pipe_id == 0)
5867 				amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5868 			else
5869 				amdgpu_fence_process(&adev->gfx.gfx_ring[1]);
5870 			break;
5871 		case 1:
5872 		case 2:
5873 			for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5874 				ring = &adev->gfx.compute_ring[i];
5875 				/* Per-queue interrupt is supported for MEC starting from VI.
5876 				 * The interrupt can only be enabled/disabled per pipe instead
5877 				 * of per queue.
5878 				 */
5879 				if ((ring->me == me_id) &&
5880 				    (ring->pipe == pipe_id) &&
5881 				    (ring->queue == queue_id))
5882 					amdgpu_fence_process(ring);
5883 			}
5884 			break;
5885 		}
5886 	}
5887 
5888 	return 0;
5889 }
5890 
5891 static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5892 					      struct amdgpu_irq_src *source,
5893 					      unsigned type,
5894 					      enum amdgpu_interrupt_state state)
5895 {
5896 	switch (state) {
5897 	case AMDGPU_IRQ_STATE_DISABLE:
5898 	case AMDGPU_IRQ_STATE_ENABLE:
5899 		WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0,
5900 			       PRIV_REG_INT_ENABLE,
5901 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5902 		break;
5903 	default:
5904 		break;
5905 	}
5906 
5907 	return 0;
5908 }
5909 
5910 static int gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5911 					       struct amdgpu_irq_src *source,
5912 					       unsigned type,
5913 					       enum amdgpu_interrupt_state state)
5914 {
5915 	switch (state) {
5916 	case AMDGPU_IRQ_STATE_DISABLE:
5917 	case AMDGPU_IRQ_STATE_ENABLE:
5918 		WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0,
5919 			       PRIV_INSTR_INT_ENABLE,
5920 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5921 		break;
5922 	default:
5923 		break;
5924 	}
5925 
5926 	return 0;
5927 }
5928 
5929 static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev,
5930 					struct amdgpu_iv_entry *entry)
5931 {
5932 	u8 me_id, pipe_id, queue_id;
5933 	struct amdgpu_ring *ring;
5934 	int i;
5935 
5936 	me_id = (entry->ring_id & 0x0c) >> 2;
5937 	pipe_id = (entry->ring_id & 0x03) >> 0;
5938 	queue_id = (entry->ring_id & 0x70) >> 4;
5939 
5940 	switch (me_id) {
5941 	case 0:
5942 		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
5943 			ring = &adev->gfx.gfx_ring[i];
5944 			/* we only enabled 1 gfx queue per pipe for now */
5945 			if (ring->me == me_id && ring->pipe == pipe_id)
5946 				drm_sched_fault(&ring->sched);
5947 		}
5948 		break;
5949 	case 1:
5950 	case 2:
5951 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5952 			ring = &adev->gfx.compute_ring[i];
5953 			if (ring->me == me_id && ring->pipe == pipe_id &&
5954 			    ring->queue == queue_id)
5955 				drm_sched_fault(&ring->sched);
5956 		}
5957 		break;
5958 	default:
5959 		BUG();
5960 		break;
5961 	}
5962 }
5963 
5964 static int gfx_v11_0_priv_reg_irq(struct amdgpu_device *adev,
5965 				  struct amdgpu_irq_src *source,
5966 				  struct amdgpu_iv_entry *entry)
5967 {
5968 	DRM_ERROR("Illegal register access in command stream\n");
5969 	gfx_v11_0_handle_priv_fault(adev, entry);
5970 	return 0;
5971 }
5972 
5973 static int gfx_v11_0_priv_inst_irq(struct amdgpu_device *adev,
5974 				   struct amdgpu_irq_src *source,
5975 				   struct amdgpu_iv_entry *entry)
5976 {
5977 	DRM_ERROR("Illegal instruction in command stream\n");
5978 	gfx_v11_0_handle_priv_fault(adev, entry);
5979 	return 0;
5980 }
5981 
5982 #if 0
5983 static int gfx_v11_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
5984 					     struct amdgpu_irq_src *src,
5985 					     unsigned int type,
5986 					     enum amdgpu_interrupt_state state)
5987 {
5988 	uint32_t tmp, target;
5989 	struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
5990 
5991 	target = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
5992 	target += ring->pipe;
5993 
5994 	switch (type) {
5995 	case AMDGPU_CP_KIQ_IRQ_DRIVER0:
5996 		if (state == AMDGPU_IRQ_STATE_DISABLE) {
5997 			tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL);
5998 			tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
5999 					    GENERIC2_INT_ENABLE, 0);
6000 			WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp);
6001 
6002 			tmp = RREG32_SOC15_IP(GC, target);
6003 			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL,
6004 					    GENERIC2_INT_ENABLE, 0);
6005 			WREG32_SOC15_IP(GC, target, tmp);
6006 		} else {
6007 			tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL);
6008 			tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6009 					    GENERIC2_INT_ENABLE, 1);
6010 			WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp);
6011 
6012 			tmp = RREG32_SOC15_IP(GC, target);
6013 			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL,
6014 					    GENERIC2_INT_ENABLE, 1);
6015 			WREG32_SOC15_IP(GC, target, tmp);
6016 		}
6017 		break;
6018 	default:
6019 		BUG(); /* kiq only support GENERIC2_INT now */
6020 		break;
6021 	}
6022 	return 0;
6023 }
6024 #endif
6025 
6026 static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring)
6027 {
6028 	const unsigned int gcr_cntl =
6029 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) |
6030 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) |
6031 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) |
6032 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) |
6033 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) |
6034 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) |
6035 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) |
6036 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1);
6037 
6038 	/* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */
6039 	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6));
6040 	amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */
6041 	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6042 	amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
6043 	amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6044 	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
6045 	amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6046 	amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
6047 }
6048 
6049 static const struct amd_ip_funcs gfx_v11_0_ip_funcs = {
6050 	.name = "gfx_v11_0",
6051 	.early_init = gfx_v11_0_early_init,
6052 	.late_init = gfx_v11_0_late_init,
6053 	.sw_init = gfx_v11_0_sw_init,
6054 	.sw_fini = gfx_v11_0_sw_fini,
6055 	.hw_init = gfx_v11_0_hw_init,
6056 	.hw_fini = gfx_v11_0_hw_fini,
6057 	.suspend = gfx_v11_0_suspend,
6058 	.resume = gfx_v11_0_resume,
6059 	.is_idle = gfx_v11_0_is_idle,
6060 	.wait_for_idle = gfx_v11_0_wait_for_idle,
6061 	.soft_reset = gfx_v11_0_soft_reset,
6062 	.check_soft_reset = gfx_v11_0_check_soft_reset,
6063 	.set_clockgating_state = gfx_v11_0_set_clockgating_state,
6064 	.set_powergating_state = gfx_v11_0_set_powergating_state,
6065 	.get_clockgating_state = gfx_v11_0_get_clockgating_state,
6066 };
6067 
6068 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
6069 	.type = AMDGPU_RING_TYPE_GFX,
6070 	.align_mask = 0xff,
6071 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6072 	.support_64bit_ptrs = true,
6073 	.secure_submission_supported = true,
6074 	.vmhub = AMDGPU_GFXHUB_0,
6075 	.get_rptr = gfx_v11_0_ring_get_rptr_gfx,
6076 	.get_wptr = gfx_v11_0_ring_get_wptr_gfx,
6077 	.set_wptr = gfx_v11_0_ring_set_wptr_gfx,
6078 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
6079 		5 + /* COND_EXEC */
6080 		7 + /* PIPELINE_SYNC */
6081 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6082 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6083 		2 + /* VM_FLUSH */
6084 		8 + /* FENCE for VM_FLUSH */
6085 		20 + /* GDS switch */
6086 		5 + /* COND_EXEC */
6087 		7 + /* HDP_flush */
6088 		4 + /* VGT_flush */
6089 		31 + /*	DE_META */
6090 		3 + /* CNTX_CTRL */
6091 		5 + /* HDP_INVL */
6092 		8 + 8 + /* FENCE x2 */
6093 		8, /* gfx_v11_0_emit_mem_sync */
6094 	.emit_ib_size =	4, /* gfx_v11_0_ring_emit_ib_gfx */
6095 	.emit_ib = gfx_v11_0_ring_emit_ib_gfx,
6096 	.emit_fence = gfx_v11_0_ring_emit_fence,
6097 	.emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync,
6098 	.emit_vm_flush = gfx_v11_0_ring_emit_vm_flush,
6099 	.emit_gds_switch = gfx_v11_0_ring_emit_gds_switch,
6100 	.emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
6101 	.test_ring = gfx_v11_0_ring_test_ring,
6102 	.test_ib = gfx_v11_0_ring_test_ib,
6103 	.insert_nop = amdgpu_ring_insert_nop,
6104 	.pad_ib = amdgpu_ring_generic_pad_ib,
6105 	.emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl,
6106 	.init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec,
6107 	.patch_cond_exec = gfx_v11_0_ring_emit_patch_cond_exec,
6108 	.preempt_ib = gfx_v11_0_ring_preempt_ib,
6109 	.emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl,
6110 	.emit_wreg = gfx_v11_0_ring_emit_wreg,
6111 	.emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
6112 	.emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
6113 	.soft_recovery = gfx_v11_0_ring_soft_recovery,
6114 	.emit_mem_sync = gfx_v11_0_emit_mem_sync,
6115 };
6116 
6117 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
6118 	.type = AMDGPU_RING_TYPE_COMPUTE,
6119 	.align_mask = 0xff,
6120 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6121 	.support_64bit_ptrs = true,
6122 	.vmhub = AMDGPU_GFXHUB_0,
6123 	.get_rptr = gfx_v11_0_ring_get_rptr_compute,
6124 	.get_wptr = gfx_v11_0_ring_get_wptr_compute,
6125 	.set_wptr = gfx_v11_0_ring_set_wptr_compute,
6126 	.emit_frame_size =
6127 		20 + /* gfx_v11_0_ring_emit_gds_switch */
6128 		7 + /* gfx_v11_0_ring_emit_hdp_flush */
6129 		5 + /* hdp invalidate */
6130 		7 + /* gfx_v11_0_ring_emit_pipeline_sync */
6131 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6132 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6133 		2 + /* gfx_v11_0_ring_emit_vm_flush */
6134 		8 + 8 + 8 + /* gfx_v11_0_ring_emit_fence x3 for user fence, vm fence */
6135 		8, /* gfx_v11_0_emit_mem_sync */
6136 	.emit_ib_size =	7, /* gfx_v11_0_ring_emit_ib_compute */
6137 	.emit_ib = gfx_v11_0_ring_emit_ib_compute,
6138 	.emit_fence = gfx_v11_0_ring_emit_fence,
6139 	.emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync,
6140 	.emit_vm_flush = gfx_v11_0_ring_emit_vm_flush,
6141 	.emit_gds_switch = gfx_v11_0_ring_emit_gds_switch,
6142 	.emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
6143 	.test_ring = gfx_v11_0_ring_test_ring,
6144 	.test_ib = gfx_v11_0_ring_test_ib,
6145 	.insert_nop = amdgpu_ring_insert_nop,
6146 	.pad_ib = amdgpu_ring_generic_pad_ib,
6147 	.emit_wreg = gfx_v11_0_ring_emit_wreg,
6148 	.emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
6149 	.emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
6150 	.emit_mem_sync = gfx_v11_0_emit_mem_sync,
6151 };
6152 
6153 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = {
6154 	.type = AMDGPU_RING_TYPE_KIQ,
6155 	.align_mask = 0xff,
6156 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6157 	.support_64bit_ptrs = true,
6158 	.vmhub = AMDGPU_GFXHUB_0,
6159 	.get_rptr = gfx_v11_0_ring_get_rptr_compute,
6160 	.get_wptr = gfx_v11_0_ring_get_wptr_compute,
6161 	.set_wptr = gfx_v11_0_ring_set_wptr_compute,
6162 	.emit_frame_size =
6163 		20 + /* gfx_v11_0_ring_emit_gds_switch */
6164 		7 + /* gfx_v11_0_ring_emit_hdp_flush */
6165 		5 + /*hdp invalidate */
6166 		7 + /* gfx_v11_0_ring_emit_pipeline_sync */
6167 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6168 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6169 		2 + /* gfx_v11_0_ring_emit_vm_flush */
6170 		8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6171 	.emit_ib_size =	7, /* gfx_v11_0_ring_emit_ib_compute */
6172 	.emit_ib = gfx_v11_0_ring_emit_ib_compute,
6173 	.emit_fence = gfx_v11_0_ring_emit_fence_kiq,
6174 	.test_ring = gfx_v11_0_ring_test_ring,
6175 	.test_ib = gfx_v11_0_ring_test_ib,
6176 	.insert_nop = amdgpu_ring_insert_nop,
6177 	.pad_ib = amdgpu_ring_generic_pad_ib,
6178 	.emit_rreg = gfx_v11_0_ring_emit_rreg,
6179 	.emit_wreg = gfx_v11_0_ring_emit_wreg,
6180 	.emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
6181 	.emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
6182 };
6183 
6184 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev)
6185 {
6186 	int i;
6187 
6188 	adev->gfx.kiq.ring.funcs = &gfx_v11_0_ring_funcs_kiq;
6189 
6190 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6191 		adev->gfx.gfx_ring[i].funcs = &gfx_v11_0_ring_funcs_gfx;
6192 
6193 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6194 		adev->gfx.compute_ring[i].funcs = &gfx_v11_0_ring_funcs_compute;
6195 }
6196 
6197 static const struct amdgpu_irq_src_funcs gfx_v11_0_eop_irq_funcs = {
6198 	.set = gfx_v11_0_set_eop_interrupt_state,
6199 	.process = gfx_v11_0_eop_irq,
6200 };
6201 
6202 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_reg_irq_funcs = {
6203 	.set = gfx_v11_0_set_priv_reg_fault_state,
6204 	.process = gfx_v11_0_priv_reg_irq,
6205 };
6206 
6207 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = {
6208 	.set = gfx_v11_0_set_priv_inst_fault_state,
6209 	.process = gfx_v11_0_priv_inst_irq,
6210 };
6211 
6212 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev)
6213 {
6214 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6215 	adev->gfx.eop_irq.funcs = &gfx_v11_0_eop_irq_funcs;
6216 
6217 	adev->gfx.priv_reg_irq.num_types = 1;
6218 	adev->gfx.priv_reg_irq.funcs = &gfx_v11_0_priv_reg_irq_funcs;
6219 
6220 	adev->gfx.priv_inst_irq.num_types = 1;
6221 	adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs;
6222 }
6223 
6224 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev)
6225 {
6226 	if (adev->flags & AMD_IS_APU)
6227 		adev->gfx.imu.mode = MISSION_MODE;
6228 	else
6229 		adev->gfx.imu.mode = DEBUG_MODE;
6230 
6231 	adev->gfx.imu.funcs = &gfx_v11_0_imu_funcs;
6232 }
6233 
6234 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev)
6235 {
6236 	adev->gfx.rlc.funcs = &gfx_v11_0_rlc_funcs;
6237 }
6238 
6239 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev)
6240 {
6241 	unsigned total_cu = adev->gfx.config.max_cu_per_sh *
6242 			    adev->gfx.config.max_sh_per_se *
6243 			    adev->gfx.config.max_shader_engines;
6244 
6245 	adev->gds.gds_size = 0x1000;
6246 	adev->gds.gds_compute_max_wave_id = total_cu * 32 - 1;
6247 	adev->gds.gws_size = 64;
6248 	adev->gds.oa_size = 16;
6249 }
6250 
6251 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev)
6252 {
6253 	/* set gfx eng mqd */
6254 	adev->mqds[AMDGPU_HW_IP_GFX].mqd_size =
6255 		sizeof(struct v11_gfx_mqd);
6256 	adev->mqds[AMDGPU_HW_IP_GFX].init_mqd =
6257 		gfx_v11_0_gfx_mqd_init;
6258 	/* set compute eng mqd */
6259 	adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size =
6260 		sizeof(struct v11_compute_mqd);
6261 	adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd =
6262 		gfx_v11_0_compute_mqd_init;
6263 }
6264 
6265 static void gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev,
6266 							  u32 bitmap)
6267 {
6268 	u32 data;
6269 
6270 	if (!bitmap)
6271 		return;
6272 
6273 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
6274 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
6275 
6276 	WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data);
6277 }
6278 
6279 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev)
6280 {
6281 	u32 data, wgp_bitmask;
6282 	data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG);
6283 	data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG);
6284 
6285 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
6286 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
6287 
6288 	wgp_bitmask =
6289 		amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1);
6290 
6291 	return (~data) & wgp_bitmask;
6292 }
6293 
6294 static u32 gfx_v11_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev)
6295 {
6296 	u32 wgp_idx, wgp_active_bitmap;
6297 	u32 cu_bitmap_per_wgp, cu_active_bitmap;
6298 
6299 	wgp_active_bitmap = gfx_v11_0_get_wgp_active_bitmap_per_sh(adev);
6300 	cu_active_bitmap = 0;
6301 
6302 	for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) {
6303 		/* if there is one WGP enabled, it means 2 CUs will be enabled */
6304 		cu_bitmap_per_wgp = 3 << (2 * wgp_idx);
6305 		if (wgp_active_bitmap & (1 << wgp_idx))
6306 			cu_active_bitmap |= cu_bitmap_per_wgp;
6307 	}
6308 
6309 	return cu_active_bitmap;
6310 }
6311 
6312 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
6313 				 struct amdgpu_cu_info *cu_info)
6314 {
6315 	int i, j, k, counter, active_cu_number = 0;
6316 	u32 mask, bitmap;
6317 	unsigned disable_masks[8 * 2];
6318 
6319 	if (!adev || !cu_info)
6320 		return -EINVAL;
6321 
6322 	amdgpu_gfx_parse_disable_cu(disable_masks, 8, 2);
6323 
6324 	mutex_lock(&adev->grbm_idx_mutex);
6325 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6326 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6327 			mask = 1;
6328 			counter = 0;
6329 			gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff);
6330 			if (i < 8 && j < 2)
6331 				gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(
6332 					adev, disable_masks[i * 2 + j]);
6333 			bitmap = gfx_v11_0_get_cu_active_bitmap_per_sh(adev);
6334 
6335 			/**
6336 			 * GFX11 could support more than 4 SEs, while the bitmap
6337 			 * in cu_info struct is 4x4 and ioctl interface struct
6338 			 * drm_amdgpu_info_device should keep stable.
6339 			 * So we use last two columns of bitmap to store cu mask for
6340 			 * SEs 4 to 7, the layout of the bitmap is as below:
6341 			 *    SE0: {SH0,SH1} --> {bitmap[0][0], bitmap[0][1]}
6342 			 *    SE1: {SH0,SH1} --> {bitmap[1][0], bitmap[1][1]}
6343 			 *    SE2: {SH0,SH1} --> {bitmap[2][0], bitmap[2][1]}
6344 			 *    SE3: {SH0,SH1} --> {bitmap[3][0], bitmap[3][1]}
6345 			 *    SE4: {SH0,SH1} --> {bitmap[0][2], bitmap[0][3]}
6346 			 *    SE5: {SH0,SH1} --> {bitmap[1][2], bitmap[1][3]}
6347 			 *    SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]}
6348 			 *    SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]}
6349 			 */
6350 			cu_info->bitmap[i % 4][j + (i / 4) * 2] = bitmap;
6351 
6352 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
6353 				if (bitmap & mask)
6354 					counter++;
6355 
6356 				mask <<= 1;
6357 			}
6358 			active_cu_number += counter;
6359 		}
6360 	}
6361 	gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6362 	mutex_unlock(&adev->grbm_idx_mutex);
6363 
6364 	cu_info->number = active_cu_number;
6365 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6366 
6367 	return 0;
6368 }
6369 
6370 const struct amdgpu_ip_block_version gfx_v11_0_ip_block =
6371 {
6372 	.type = AMD_IP_BLOCK_TYPE_GFX,
6373 	.major = 11,
6374 	.minor = 0,
6375 	.rev = 0,
6376 	.funcs = &gfx_v11_0_ip_funcs,
6377 };
6378