xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c (revision 25b892b5)
1 /*
2  * Copyright 2019 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/firmware.h>
25 
26 #include "amdgpu.h"
27 #include "amdgpu_vcn.h"
28 #include "amdgpu_pm.h"
29 #include "soc15.h"
30 #include "soc15d.h"
31 #include "vcn_v2_0.h"
32 #include "mmsch_v1_0.h"
33 
34 #include "vcn/vcn_2_5_offset.h"
35 #include "vcn/vcn_2_5_sh_mask.h"
36 #include "ivsrcid/vcn/irqsrcs_vcn_2_0.h"
37 
38 #define mmUVD_CONTEXT_ID_INTERNAL_OFFSET			0x27
39 #define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET			0x0f
40 #define mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET			0x10
41 #define mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET			0x11
42 #define mmUVD_NO_OP_INTERNAL_OFFSET				0x29
43 #define mmUVD_GP_SCRATCH8_INTERNAL_OFFSET			0x66
44 #define mmUVD_SCRATCH9_INTERNAL_OFFSET				0xc01d
45 
46 #define mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET			0x431
47 #define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET		0x3b4
48 #define mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET		0x3b5
49 #define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET			0x25c
50 
51 #define VCN25_MAX_HW_INSTANCES_ARCTURUS			2
52 
53 static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev);
54 static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev);
55 static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev);
56 static int vcn_v2_5_set_powergating_state(void *handle,
57 				enum amd_powergating_state state);
58 static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev,
59 				int inst_idx, struct dpg_pause_state *new_state);
60 static int vcn_v2_5_sriov_start(struct amdgpu_device *adev);
61 
62 static int amdgpu_ih_clientid_vcns[] = {
63 	SOC15_IH_CLIENTID_VCN,
64 	SOC15_IH_CLIENTID_VCN1
65 };
66 
67 /**
68  * vcn_v2_5_early_init - set function pointers
69  *
70  * @handle: amdgpu_device pointer
71  *
72  * Set ring and irq function pointers
73  */
74 static int vcn_v2_5_early_init(void *handle)
75 {
76 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
77 
78 	if (amdgpu_sriov_vf(adev)) {
79 		adev->vcn.num_vcn_inst = 2;
80 		adev->vcn.harvest_config = 0;
81 		adev->vcn.num_enc_rings = 1;
82 	} else {
83 		u32 harvest;
84 		int i;
85 		adev->vcn.num_vcn_inst = VCN25_MAX_HW_INSTANCES_ARCTURUS;
86 		for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
87 			harvest = RREG32_SOC15(VCN, i, mmCC_UVD_HARVESTING);
88 			if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK)
89 				adev->vcn.harvest_config |= 1 << i;
90 		}
91 		if (adev->vcn.harvest_config == (AMDGPU_VCN_HARVEST_VCN0 |
92 					AMDGPU_VCN_HARVEST_VCN1))
93 			/* both instances are harvested, disable the block */
94 			return -ENOENT;
95 
96 		adev->vcn.num_enc_rings = 2;
97 	}
98 
99 	vcn_v2_5_set_dec_ring_funcs(adev);
100 	vcn_v2_5_set_enc_ring_funcs(adev);
101 	vcn_v2_5_set_irq_funcs(adev);
102 
103 	return 0;
104 }
105 
106 /**
107  * vcn_v2_5_sw_init - sw init for VCN block
108  *
109  * @handle: amdgpu_device pointer
110  *
111  * Load firmware and sw initialization
112  */
113 static int vcn_v2_5_sw_init(void *handle)
114 {
115 	struct amdgpu_ring *ring;
116 	int i, j, r;
117 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
118 
119 	for (j = 0; j < adev->vcn.num_vcn_inst; j++) {
120 		if (adev->vcn.harvest_config & (1 << j))
121 			continue;
122 		/* VCN DEC TRAP */
123 		r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j],
124 				VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.inst[j].irq);
125 		if (r)
126 			return r;
127 
128 		/* VCN ENC TRAP */
129 		for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
130 			r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j],
131 				i + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[j].irq);
132 			if (r)
133 				return r;
134 		}
135 	}
136 
137 	r = amdgpu_vcn_sw_init(adev);
138 	if (r)
139 		return r;
140 
141 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
142 		const struct common_firmware_header *hdr;
143 		hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
144 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].ucode_id = AMDGPU_UCODE_ID_VCN;
145 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw;
146 		adev->firmware.fw_size +=
147 			ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
148 
149 		if (adev->vcn.num_vcn_inst == VCN25_MAX_HW_INSTANCES_ARCTURUS) {
150 			adev->firmware.ucode[AMDGPU_UCODE_ID_VCN1].ucode_id = AMDGPU_UCODE_ID_VCN1;
151 			adev->firmware.ucode[AMDGPU_UCODE_ID_VCN1].fw = adev->vcn.fw;
152 			adev->firmware.fw_size +=
153 				ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
154 		}
155 		dev_info(adev->dev, "Will use PSP to load VCN firmware\n");
156 	}
157 
158 	r = amdgpu_vcn_resume(adev);
159 	if (r)
160 		return r;
161 
162 	for (j = 0; j < adev->vcn.num_vcn_inst; j++) {
163 		volatile struct amdgpu_fw_shared *fw_shared;
164 
165 		if (adev->vcn.harvest_config & (1 << j))
166 			continue;
167 		adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET;
168 		adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET;
169 		adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET;
170 		adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET;
171 		adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET;
172 		adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET;
173 
174 		adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET;
175 		adev->vcn.inst[j].external.scratch9 = SOC15_REG_OFFSET(VCN, j, mmUVD_SCRATCH9);
176 		adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET;
177 		adev->vcn.inst[j].external.data0 = SOC15_REG_OFFSET(VCN, j, mmUVD_GPCOM_VCPU_DATA0);
178 		adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET;
179 		adev->vcn.inst[j].external.data1 = SOC15_REG_OFFSET(VCN, j, mmUVD_GPCOM_VCPU_DATA1);
180 		adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET;
181 		adev->vcn.inst[j].external.cmd = SOC15_REG_OFFSET(VCN, j, mmUVD_GPCOM_VCPU_CMD);
182 		adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET;
183 		adev->vcn.inst[j].external.nop = SOC15_REG_OFFSET(VCN, j, mmUVD_NO_OP);
184 
185 		ring = &adev->vcn.inst[j].ring_dec;
186 		ring->use_doorbell = true;
187 
188 		ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
189 				(amdgpu_sriov_vf(adev) ? 2*j : 8*j);
190 		sprintf(ring->name, "vcn_dec_%d", j);
191 		r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq,
192 				     0, AMDGPU_RING_PRIO_DEFAULT, NULL);
193 		if (r)
194 			return r;
195 
196 		for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
197 			ring = &adev->vcn.inst[j].ring_enc[i];
198 			ring->use_doorbell = true;
199 
200 			ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
201 					(amdgpu_sriov_vf(adev) ? (1 + i + 2*j) : (2 + i + 8*j));
202 
203 			sprintf(ring->name, "vcn_enc_%d.%d", j, i);
204 			r = amdgpu_ring_init(adev, ring, 512,
205 					     &adev->vcn.inst[j].irq, 0,
206 					     AMDGPU_RING_PRIO_DEFAULT, NULL);
207 			if (r)
208 				return r;
209 		}
210 
211 		fw_shared = adev->vcn.inst[j].fw_shared_cpu_addr;
212 		fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_VCN_MULTI_QUEUE_FLAG);
213 	}
214 
215 	if (amdgpu_sriov_vf(adev)) {
216 		r = amdgpu_virt_alloc_mm_table(adev);
217 		if (r)
218 			return r;
219 	}
220 
221 	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
222 		adev->vcn.pause_dpg_mode = vcn_v2_5_pause_dpg_mode;
223 
224 	return 0;
225 }
226 
227 /**
228  * vcn_v2_5_sw_fini - sw fini for VCN block
229  *
230  * @handle: amdgpu_device pointer
231  *
232  * VCN suspend and free up sw allocation
233  */
234 static int vcn_v2_5_sw_fini(void *handle)
235 {
236 	int i, r;
237 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
238 	volatile struct amdgpu_fw_shared *fw_shared;
239 
240 	for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
241 		if (adev->vcn.harvest_config & (1 << i))
242 			continue;
243 		fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr;
244 		fw_shared->present_flag_0 = 0;
245 	}
246 
247 	if (amdgpu_sriov_vf(adev))
248 		amdgpu_virt_free_mm_table(adev);
249 
250 	r = amdgpu_vcn_suspend(adev);
251 	if (r)
252 		return r;
253 
254 	r = amdgpu_vcn_sw_fini(adev);
255 
256 	return r;
257 }
258 
259 /**
260  * vcn_v2_5_hw_init - start and test VCN block
261  *
262  * @handle: amdgpu_device pointer
263  *
264  * Initialize the hardware, boot up the VCPU and do some testing
265  */
266 static int vcn_v2_5_hw_init(void *handle)
267 {
268 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
269 	struct amdgpu_ring *ring;
270 	int i, j, r = 0;
271 
272 	if (amdgpu_sriov_vf(adev))
273 		r = vcn_v2_5_sriov_start(adev);
274 
275 	for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
276 		if (adev->vcn.harvest_config & (1 << j))
277 			continue;
278 
279 		if (amdgpu_sriov_vf(adev)) {
280 			adev->vcn.inst[j].ring_enc[0].sched.ready = true;
281 			adev->vcn.inst[j].ring_enc[1].sched.ready = false;
282 			adev->vcn.inst[j].ring_enc[2].sched.ready = false;
283 			adev->vcn.inst[j].ring_dec.sched.ready = true;
284 		} else {
285 
286 			ring = &adev->vcn.inst[j].ring_dec;
287 
288 			adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
289 						     ring->doorbell_index, j);
290 
291 			r = amdgpu_ring_test_helper(ring);
292 			if (r)
293 				goto done;
294 
295 			for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
296 				ring = &adev->vcn.inst[j].ring_enc[i];
297 				r = amdgpu_ring_test_helper(ring);
298 				if (r)
299 					goto done;
300 			}
301 		}
302 	}
303 
304 done:
305 	if (!r)
306 		DRM_INFO("VCN decode and encode initialized successfully(under %s).\n",
307 			(adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode");
308 
309 	return r;
310 }
311 
312 /**
313  * vcn_v2_5_hw_fini - stop the hardware block
314  *
315  * @handle: amdgpu_device pointer
316  *
317  * Stop the VCN block, mark ring as not ready any more
318  */
319 static int vcn_v2_5_hw_fini(void *handle)
320 {
321 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
322 	int i;
323 
324 	cancel_delayed_work_sync(&adev->vcn.idle_work);
325 
326 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
327 		if (adev->vcn.harvest_config & (1 << i))
328 			continue;
329 
330 		if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
331 		    (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
332 		     RREG32_SOC15(VCN, i, mmUVD_STATUS)))
333 			vcn_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE);
334 	}
335 
336 	return 0;
337 }
338 
339 /**
340  * vcn_v2_5_suspend - suspend VCN block
341  *
342  * @handle: amdgpu_device pointer
343  *
344  * HW fini and suspend VCN block
345  */
346 static int vcn_v2_5_suspend(void *handle)
347 {
348 	int r;
349 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
350 
351 	r = vcn_v2_5_hw_fini(adev);
352 	if (r)
353 		return r;
354 
355 	r = amdgpu_vcn_suspend(adev);
356 
357 	return r;
358 }
359 
360 /**
361  * vcn_v2_5_resume - resume VCN block
362  *
363  * @handle: amdgpu_device pointer
364  *
365  * Resume firmware and hw init VCN block
366  */
367 static int vcn_v2_5_resume(void *handle)
368 {
369 	int r;
370 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
371 
372 	r = amdgpu_vcn_resume(adev);
373 	if (r)
374 		return r;
375 
376 	r = vcn_v2_5_hw_init(adev);
377 
378 	return r;
379 }
380 
381 /**
382  * vcn_v2_5_mc_resume - memory controller programming
383  *
384  * @adev: amdgpu_device pointer
385  *
386  * Let the VCN memory controller know it's offsets
387  */
388 static void vcn_v2_5_mc_resume(struct amdgpu_device *adev)
389 {
390 	uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
391 	uint32_t offset;
392 	int i;
393 
394 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
395 		if (adev->vcn.harvest_config & (1 << i))
396 			continue;
397 		/* cache window 0: fw */
398 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
399 			WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
400 				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo));
401 			WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
402 				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi));
403 			WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET0, 0);
404 			offset = 0;
405 		} else {
406 			WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
407 				lower_32_bits(adev->vcn.inst[i].gpu_addr));
408 			WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
409 				upper_32_bits(adev->vcn.inst[i].gpu_addr));
410 			offset = size;
411 			WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET0,
412 				AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
413 		}
414 		WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE0, size);
415 
416 		/* cache window 1: stack */
417 		WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
418 			lower_32_bits(adev->vcn.inst[i].gpu_addr + offset));
419 		WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
420 			upper_32_bits(adev->vcn.inst[i].gpu_addr + offset));
421 		WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET1, 0);
422 		WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);
423 
424 		/* cache window 2: context */
425 		WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
426 			lower_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
427 		WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
428 			upper_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
429 		WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET2, 0);
430 		WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
431 
432 		/* non-cache window */
433 		WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
434 			lower_32_bits(adev->vcn.inst[i].fw_shared_gpu_addr));
435 		WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
436 			upper_32_bits(adev->vcn.inst[i].fw_shared_gpu_addr));
437 		WREG32_SOC15(VCN, i, mmUVD_VCPU_NONCACHE_OFFSET0, 0);
438 		WREG32_SOC15(VCN, i, mmUVD_VCPU_NONCACHE_SIZE0,
439 			AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)));
440 	}
441 }
442 
443 static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
444 {
445 	uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
446 	uint32_t offset;
447 
448 	/* cache window 0: fw */
449 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
450 		if (!indirect) {
451 			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
452 				VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
453 				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_lo), 0, indirect);
454 			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
455 				VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
456 				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_hi), 0, indirect);
457 			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
458 				VCN, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
459 		} else {
460 			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
461 				VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
462 			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
463 				VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
464 			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
465 				VCN, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
466 		}
467 		offset = 0;
468 	} else {
469 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
470 			VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
471 			lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
472 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
473 			VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
474 			upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
475 		offset = size;
476 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
477 			VCN, 0, mmUVD_VCPU_CACHE_OFFSET0),
478 			AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
479 	}
480 
481 	if (!indirect)
482 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
483 			VCN, 0, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
484 	else
485 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
486 			VCN, 0, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
487 
488 	/* cache window 1: stack */
489 	if (!indirect) {
490 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
491 			VCN, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
492 			lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
493 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
494 			VCN, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
495 			upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
496 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
497 			VCN, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
498 	} else {
499 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
500 			VCN, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
501 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
502 			VCN, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
503 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
504 			VCN, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
505 	}
506 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
507 		VCN, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
508 
509 	/* cache window 2: context */
510 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
511 		VCN, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
512 		lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
513 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
514 		VCN, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
515 		upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
516 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
517 		VCN, 0, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
518 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
519 		VCN, 0, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
520 
521 	/* non-cache window */
522 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
523 		VCN, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
524 		lower_32_bits(adev->vcn.inst[inst_idx].fw_shared_gpu_addr), 0, indirect);
525 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
526 		VCN, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
527 		upper_32_bits(adev->vcn.inst[inst_idx].fw_shared_gpu_addr), 0, indirect);
528 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
529 		VCN, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
530 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
531 		VCN, 0, mmUVD_VCPU_NONCACHE_SIZE0),
532 		AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)), 0, indirect);
533 
534 	/* VCN global tiling registers */
535 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
536 		VCN, 0, mmUVD_GFX8_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
537 }
538 
539 /**
540  * vcn_v2_5_disable_clock_gating - disable VCN clock gating
541  *
542  * @adev: amdgpu_device pointer
543  *
544  * Disable clock gating for VCN block
545  */
546 static void vcn_v2_5_disable_clock_gating(struct amdgpu_device *adev)
547 {
548 	uint32_t data;
549 	int i;
550 
551 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
552 		if (adev->vcn.harvest_config & (1 << i))
553 			continue;
554 		/* UVD disable CGC */
555 		data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
556 		if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
557 			data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
558 		else
559 			data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
560 		data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
561 		data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
562 		WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);
563 
564 		data = RREG32_SOC15(VCN, i, mmUVD_CGC_GATE);
565 		data &= ~(UVD_CGC_GATE__SYS_MASK
566 			| UVD_CGC_GATE__UDEC_MASK
567 			| UVD_CGC_GATE__MPEG2_MASK
568 			| UVD_CGC_GATE__REGS_MASK
569 			| UVD_CGC_GATE__RBC_MASK
570 			| UVD_CGC_GATE__LMI_MC_MASK
571 			| UVD_CGC_GATE__LMI_UMC_MASK
572 			| UVD_CGC_GATE__IDCT_MASK
573 			| UVD_CGC_GATE__MPRD_MASK
574 			| UVD_CGC_GATE__MPC_MASK
575 			| UVD_CGC_GATE__LBSI_MASK
576 			| UVD_CGC_GATE__LRBBM_MASK
577 			| UVD_CGC_GATE__UDEC_RE_MASK
578 			| UVD_CGC_GATE__UDEC_CM_MASK
579 			| UVD_CGC_GATE__UDEC_IT_MASK
580 			| UVD_CGC_GATE__UDEC_DB_MASK
581 			| UVD_CGC_GATE__UDEC_MP_MASK
582 			| UVD_CGC_GATE__WCB_MASK
583 			| UVD_CGC_GATE__VCPU_MASK
584 			| UVD_CGC_GATE__MMSCH_MASK);
585 
586 		WREG32_SOC15(VCN, i, mmUVD_CGC_GATE, data);
587 
588 		SOC15_WAIT_ON_RREG(VCN, i, mmUVD_CGC_GATE, 0,  0xFFFFFFFF);
589 
590 		data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
591 		data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK
592 			| UVD_CGC_CTRL__UDEC_CM_MODE_MASK
593 			| UVD_CGC_CTRL__UDEC_IT_MODE_MASK
594 			| UVD_CGC_CTRL__UDEC_DB_MODE_MASK
595 			| UVD_CGC_CTRL__UDEC_MP_MODE_MASK
596 			| UVD_CGC_CTRL__SYS_MODE_MASK
597 			| UVD_CGC_CTRL__UDEC_MODE_MASK
598 			| UVD_CGC_CTRL__MPEG2_MODE_MASK
599 			| UVD_CGC_CTRL__REGS_MODE_MASK
600 			| UVD_CGC_CTRL__RBC_MODE_MASK
601 			| UVD_CGC_CTRL__LMI_MC_MODE_MASK
602 			| UVD_CGC_CTRL__LMI_UMC_MODE_MASK
603 			| UVD_CGC_CTRL__IDCT_MODE_MASK
604 			| UVD_CGC_CTRL__MPRD_MODE_MASK
605 			| UVD_CGC_CTRL__MPC_MODE_MASK
606 			| UVD_CGC_CTRL__LBSI_MODE_MASK
607 			| UVD_CGC_CTRL__LRBBM_MODE_MASK
608 			| UVD_CGC_CTRL__WCB_MODE_MASK
609 			| UVD_CGC_CTRL__VCPU_MODE_MASK
610 			| UVD_CGC_CTRL__MMSCH_MODE_MASK);
611 		WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);
612 
613 		/* turn on */
614 		data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE);
615 		data |= (UVD_SUVD_CGC_GATE__SRE_MASK
616 			| UVD_SUVD_CGC_GATE__SIT_MASK
617 			| UVD_SUVD_CGC_GATE__SMP_MASK
618 			| UVD_SUVD_CGC_GATE__SCM_MASK
619 			| UVD_SUVD_CGC_GATE__SDB_MASK
620 			| UVD_SUVD_CGC_GATE__SRE_H264_MASK
621 			| UVD_SUVD_CGC_GATE__SRE_HEVC_MASK
622 			| UVD_SUVD_CGC_GATE__SIT_H264_MASK
623 			| UVD_SUVD_CGC_GATE__SIT_HEVC_MASK
624 			| UVD_SUVD_CGC_GATE__SCM_H264_MASK
625 			| UVD_SUVD_CGC_GATE__SCM_HEVC_MASK
626 			| UVD_SUVD_CGC_GATE__SDB_H264_MASK
627 			| UVD_SUVD_CGC_GATE__SDB_HEVC_MASK
628 			| UVD_SUVD_CGC_GATE__SCLR_MASK
629 			| UVD_SUVD_CGC_GATE__UVD_SC_MASK
630 			| UVD_SUVD_CGC_GATE__ENT_MASK
631 			| UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK
632 			| UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK
633 			| UVD_SUVD_CGC_GATE__SITE_MASK
634 			| UVD_SUVD_CGC_GATE__SRE_VP9_MASK
635 			| UVD_SUVD_CGC_GATE__SCM_VP9_MASK
636 			| UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK
637 			| UVD_SUVD_CGC_GATE__SDB_VP9_MASK
638 			| UVD_SUVD_CGC_GATE__IME_HEVC_MASK);
639 		WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE, data);
640 
641 		data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL);
642 		data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
643 			| UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
644 			| UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
645 			| UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
646 			| UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
647 			| UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
648 			| UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
649 			| UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
650 			| UVD_SUVD_CGC_CTRL__IME_MODE_MASK
651 			| UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
652 		WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data);
653 	}
654 }
655 
656 static void vcn_v2_5_clock_gating_dpg_mode(struct amdgpu_device *adev,
657 		uint8_t sram_sel, int inst_idx, uint8_t indirect)
658 {
659 	uint32_t reg_data = 0;
660 
661 	/* enable sw clock gating control */
662 	if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
663 		reg_data = 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
664 	else
665 		reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
666 	reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
667 	reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
668 	reg_data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK |
669 		 UVD_CGC_CTRL__UDEC_CM_MODE_MASK |
670 		 UVD_CGC_CTRL__UDEC_IT_MODE_MASK |
671 		 UVD_CGC_CTRL__UDEC_DB_MODE_MASK |
672 		 UVD_CGC_CTRL__UDEC_MP_MODE_MASK |
673 		 UVD_CGC_CTRL__SYS_MODE_MASK |
674 		 UVD_CGC_CTRL__UDEC_MODE_MASK |
675 		 UVD_CGC_CTRL__MPEG2_MODE_MASK |
676 		 UVD_CGC_CTRL__REGS_MODE_MASK |
677 		 UVD_CGC_CTRL__RBC_MODE_MASK |
678 		 UVD_CGC_CTRL__LMI_MC_MODE_MASK |
679 		 UVD_CGC_CTRL__LMI_UMC_MODE_MASK |
680 		 UVD_CGC_CTRL__IDCT_MODE_MASK |
681 		 UVD_CGC_CTRL__MPRD_MODE_MASK |
682 		 UVD_CGC_CTRL__MPC_MODE_MASK |
683 		 UVD_CGC_CTRL__LBSI_MODE_MASK |
684 		 UVD_CGC_CTRL__LRBBM_MODE_MASK |
685 		 UVD_CGC_CTRL__WCB_MODE_MASK |
686 		 UVD_CGC_CTRL__VCPU_MODE_MASK |
687 		 UVD_CGC_CTRL__MMSCH_MODE_MASK);
688 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
689 		VCN, 0, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect);
690 
691 	/* turn off clock gating */
692 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
693 		VCN, 0, mmUVD_CGC_GATE), 0, sram_sel, indirect);
694 
695 	/* turn on SUVD clock gating */
696 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
697 		VCN, 0, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect);
698 
699 	/* turn on sw mode in UVD_SUVD_CGC_CTRL */
700 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
701 		VCN, 0, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect);
702 }
703 
704 /**
705  * vcn_v2_5_enable_clock_gating - enable VCN clock gating
706  *
707  * @adev: amdgpu_device pointer
708  *
709  * Enable clock gating for VCN block
710  */
711 static void vcn_v2_5_enable_clock_gating(struct amdgpu_device *adev)
712 {
713 	uint32_t data = 0;
714 	int i;
715 
716 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
717 		if (adev->vcn.harvest_config & (1 << i))
718 			continue;
719 		/* enable UVD CGC */
720 		data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
721 		if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
722 			data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
723 		else
724 			data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
725 		data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
726 		data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
727 		WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);
728 
729 		data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
730 		data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK
731 			| UVD_CGC_CTRL__UDEC_CM_MODE_MASK
732 			| UVD_CGC_CTRL__UDEC_IT_MODE_MASK
733 			| UVD_CGC_CTRL__UDEC_DB_MODE_MASK
734 			| UVD_CGC_CTRL__UDEC_MP_MODE_MASK
735 			| UVD_CGC_CTRL__SYS_MODE_MASK
736 			| UVD_CGC_CTRL__UDEC_MODE_MASK
737 			| UVD_CGC_CTRL__MPEG2_MODE_MASK
738 			| UVD_CGC_CTRL__REGS_MODE_MASK
739 			| UVD_CGC_CTRL__RBC_MODE_MASK
740 			| UVD_CGC_CTRL__LMI_MC_MODE_MASK
741 			| UVD_CGC_CTRL__LMI_UMC_MODE_MASK
742 			| UVD_CGC_CTRL__IDCT_MODE_MASK
743 			| UVD_CGC_CTRL__MPRD_MODE_MASK
744 			| UVD_CGC_CTRL__MPC_MODE_MASK
745 			| UVD_CGC_CTRL__LBSI_MODE_MASK
746 			| UVD_CGC_CTRL__LRBBM_MODE_MASK
747 			| UVD_CGC_CTRL__WCB_MODE_MASK
748 			| UVD_CGC_CTRL__VCPU_MODE_MASK);
749 		WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);
750 
751 		data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL);
752 		data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
753 			| UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
754 			| UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
755 			| UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
756 			| UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
757 			| UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
758 			| UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
759 			| UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
760 			| UVD_SUVD_CGC_CTRL__IME_MODE_MASK
761 			| UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
762 		WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data);
763 	}
764 }
765 
766 static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
767 {
768 	volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr;
769 	struct amdgpu_ring *ring;
770 	uint32_t rb_bufsz, tmp;
771 
772 	/* disable register anti-hang mechanism */
773 	WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 1,
774 		~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
775 	/* enable dynamic power gating mode */
776 	tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_POWER_STATUS);
777 	tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK;
778 	tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK;
779 	WREG32_SOC15(VCN, inst_idx, mmUVD_POWER_STATUS, tmp);
780 
781 	if (indirect)
782 		adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
783 
784 	/* enable clock gating */
785 	vcn_v2_5_clock_gating_dpg_mode(adev, 0, inst_idx, indirect);
786 
787 	/* enable VCPU clock */
788 	tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
789 	tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
790 	tmp |= UVD_VCPU_CNTL__BLK_RST_MASK;
791 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
792 		VCN, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect);
793 
794 	/* disable master interupt */
795 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
796 		VCN, 0, mmUVD_MASTINT_EN), 0, 0, indirect);
797 
798 	/* setup mmUVD_LMI_CTRL */
799 	tmp = (0x8 | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
800 		UVD_LMI_CTRL__REQ_MODE_MASK |
801 		UVD_LMI_CTRL__CRC_RESET_MASK |
802 		UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
803 		UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
804 		UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
805 		(8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
806 		0x00100000L);
807 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
808 		VCN, 0, mmUVD_LMI_CTRL), tmp, 0, indirect);
809 
810 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
811 		VCN, 0, mmUVD_MPC_CNTL),
812 		0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect);
813 
814 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
815 		VCN, 0, mmUVD_MPC_SET_MUXA0),
816 		((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
817 		 (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
818 		 (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
819 		 (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect);
820 
821 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
822 		VCN, 0, mmUVD_MPC_SET_MUXB0),
823 		((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
824 		 (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
825 		 (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
826 		 (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect);
827 
828 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
829 		VCN, 0, mmUVD_MPC_SET_MUX),
830 		((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
831 		 (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
832 		 (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);
833 
834 	vcn_v2_5_mc_resume_dpg_mode(adev, inst_idx, indirect);
835 
836 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
837 		VCN, 0, mmUVD_REG_XX_MASK), 0x10, 0, indirect);
838 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
839 		VCN, 0, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect);
840 
841 	/* enable LMI MC and UMC channels */
842 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
843 		VCN, 0, mmUVD_LMI_CTRL2), 0, 0, indirect);
844 
845 	/* unblock VCPU register access */
846 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
847 		VCN, 0, mmUVD_RB_ARB_CTRL), 0, 0, indirect);
848 
849 	tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
850 	tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
851 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
852 		VCN, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect);
853 
854 	/* enable master interrupt */
855 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
856 		VCN, 0, mmUVD_MASTINT_EN),
857 		UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
858 
859 	if (indirect)
860 		psp_update_vcn_sram(adev, inst_idx, adev->vcn.inst[inst_idx].dpg_sram_gpu_addr,
861 				    (uint32_t)((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr -
862 					       (uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr));
863 
864 	ring = &adev->vcn.inst[inst_idx].ring_dec;
865 	/* force RBC into idle state */
866 	rb_bufsz = order_base_2(ring->ring_size);
867 	tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
868 	tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
869 	tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
870 	tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
871 	tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
872 	WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_CNTL, tmp);
873 
874 	/* Stall DPG before WPTR/RPTR reset */
875 	WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
876 		UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK,
877 		~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
878 	fw_shared->multi_queue.decode_queue_mode |= FW_QUEUE_RING_RESET;
879 
880 	/* set the write pointer delay */
881 	WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR_CNTL, 0);
882 
883 	/* set the wb address */
884 	WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR_ADDR,
885 		(upper_32_bits(ring->gpu_addr) >> 2));
886 
887 	/* program the RB_BASE for ring buffer */
888 	WREG32_SOC15(VCN, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
889 		lower_32_bits(ring->gpu_addr));
890 	WREG32_SOC15(VCN, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
891 		upper_32_bits(ring->gpu_addr));
892 
893 	/* Initialize the ring buffer's read and write pointers */
894 	WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR, 0);
895 
896 	WREG32_SOC15(VCN, inst_idx, mmUVD_SCRATCH2, 0);
897 
898 	ring->wptr = RREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR);
899 	WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR,
900 		lower_32_bits(ring->wptr));
901 
902 	fw_shared->multi_queue.decode_queue_mode &= ~FW_QUEUE_RING_RESET;
903 	/* Unstall DPG */
904 	WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
905 		0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
906 
907 	return 0;
908 }
909 
910 static int vcn_v2_5_start(struct amdgpu_device *adev)
911 {
912 	struct amdgpu_ring *ring;
913 	uint32_t rb_bufsz, tmp;
914 	int i, j, k, r;
915 
916 	if (adev->pm.dpm_enabled)
917 		amdgpu_dpm_enable_uvd(adev, true);
918 
919 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
920 		if (adev->vcn.harvest_config & (1 << i))
921 			continue;
922 		if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
923 			r = vcn_v2_5_start_dpg_mode(adev, i, adev->vcn.indirect_sram);
924 			continue;
925 		}
926 
927 		/* disable register anti-hang mechanism */
928 		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_POWER_STATUS), 0,
929 			~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
930 
931 		/* set uvd status busy */
932 		tmp = RREG32_SOC15(VCN, i, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY;
933 		WREG32_SOC15(VCN, i, mmUVD_STATUS, tmp);
934 	}
935 
936 	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
937 		return 0;
938 
939 	/*SW clock gating */
940 	vcn_v2_5_disable_clock_gating(adev);
941 
942 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
943 		if (adev->vcn.harvest_config & (1 << i))
944 			continue;
945 		/* enable VCPU clock */
946 		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
947 			UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
948 
949 		/* disable master interrupt */
950 		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN), 0,
951 			~UVD_MASTINT_EN__VCPU_EN_MASK);
952 
953 		/* setup mmUVD_LMI_CTRL */
954 		tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL);
955 		tmp &= ~0xff;
956 		WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL, tmp | 0x8|
957 			UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK	|
958 			UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
959 			UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
960 			UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
961 
962 		/* setup mmUVD_MPC_CNTL */
963 		tmp = RREG32_SOC15(VCN, i, mmUVD_MPC_CNTL);
964 		tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
965 		tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
966 		WREG32_SOC15(VCN, i, mmUVD_MPC_CNTL, tmp);
967 
968 		/* setup UVD_MPC_SET_MUXA0 */
969 		WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXA0,
970 			((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
971 			(0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
972 			(0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
973 			(0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
974 
975 		/* setup UVD_MPC_SET_MUXB0 */
976 		WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXB0,
977 			((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
978 			(0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
979 			(0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
980 			(0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
981 
982 		/* setup mmUVD_MPC_SET_MUX */
983 		WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUX,
984 			((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
985 			(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
986 			(0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
987 	}
988 
989 	vcn_v2_5_mc_resume(adev);
990 
991 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
992 		volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr;
993 		if (adev->vcn.harvest_config & (1 << i))
994 			continue;
995 		/* VCN global tiling registers */
996 		WREG32_SOC15(VCN, i, mmUVD_GFX8_ADDR_CONFIG,
997 			adev->gfx.config.gb_addr_config);
998 		WREG32_SOC15(VCN, i, mmUVD_GFX8_ADDR_CONFIG,
999 			adev->gfx.config.gb_addr_config);
1000 
1001 		/* enable LMI MC and UMC channels */
1002 		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_LMI_CTRL2), 0,
1003 			~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
1004 
1005 		/* unblock VCPU register access */
1006 		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL), 0,
1007 			~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
1008 
1009 		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
1010 			~UVD_VCPU_CNTL__BLK_RST_MASK);
1011 
1012 		for (k = 0; k < 10; ++k) {
1013 			uint32_t status;
1014 
1015 			for (j = 0; j < 100; ++j) {
1016 				status = RREG32_SOC15(VCN, i, mmUVD_STATUS);
1017 				if (status & 2)
1018 					break;
1019 				if (amdgpu_emu_mode == 1)
1020 					msleep(500);
1021 				else
1022 					mdelay(10);
1023 			}
1024 			r = 0;
1025 			if (status & 2)
1026 				break;
1027 
1028 			DRM_ERROR("VCN decode not responding, trying to reset the VCPU!!!\n");
1029 			WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
1030 				UVD_VCPU_CNTL__BLK_RST_MASK,
1031 				~UVD_VCPU_CNTL__BLK_RST_MASK);
1032 			mdelay(10);
1033 			WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
1034 				~UVD_VCPU_CNTL__BLK_RST_MASK);
1035 
1036 			mdelay(10);
1037 			r = -1;
1038 		}
1039 
1040 		if (r) {
1041 			DRM_ERROR("VCN decode not responding, giving up!!!\n");
1042 			return r;
1043 		}
1044 
1045 		/* enable master interrupt */
1046 		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN),
1047 			UVD_MASTINT_EN__VCPU_EN_MASK,
1048 			~UVD_MASTINT_EN__VCPU_EN_MASK);
1049 
1050 		/* clear the busy bit of VCN_STATUS */
1051 		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS), 0,
1052 			~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
1053 
1054 		WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_VMID, 0);
1055 
1056 		ring = &adev->vcn.inst[i].ring_dec;
1057 		/* force RBC into idle state */
1058 		rb_bufsz = order_base_2(ring->ring_size);
1059 		tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
1060 		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
1061 		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
1062 		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
1063 		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
1064 		WREG32_SOC15(VCN, i, mmUVD_RBC_RB_CNTL, tmp);
1065 
1066 		fw_shared->multi_queue.decode_queue_mode |= FW_QUEUE_RING_RESET;
1067 		/* program the RB_BASE for ring buffer */
1068 		WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
1069 			lower_32_bits(ring->gpu_addr));
1070 		WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
1071 			upper_32_bits(ring->gpu_addr));
1072 
1073 		/* Initialize the ring buffer's read and write pointers */
1074 		WREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR, 0);
1075 
1076 		ring->wptr = RREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR);
1077 		WREG32_SOC15(VCN, i, mmUVD_RBC_RB_WPTR,
1078 				lower_32_bits(ring->wptr));
1079 		fw_shared->multi_queue.decode_queue_mode &= ~FW_QUEUE_RING_RESET;
1080 
1081 		fw_shared->multi_queue.encode_generalpurpose_queue_mode |= FW_QUEUE_RING_RESET;
1082 		ring = &adev->vcn.inst[i].ring_enc[0];
1083 		WREG32_SOC15(VCN, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
1084 		WREG32_SOC15(VCN, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
1085 		WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO, ring->gpu_addr);
1086 		WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
1087 		WREG32_SOC15(VCN, i, mmUVD_RB_SIZE, ring->ring_size / 4);
1088 		fw_shared->multi_queue.encode_generalpurpose_queue_mode &= ~FW_QUEUE_RING_RESET;
1089 
1090 		fw_shared->multi_queue.encode_lowlatency_queue_mode |= FW_QUEUE_RING_RESET;
1091 		ring = &adev->vcn.inst[i].ring_enc[1];
1092 		WREG32_SOC15(VCN, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
1093 		WREG32_SOC15(VCN, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
1094 		WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO2, ring->gpu_addr);
1095 		WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
1096 		WREG32_SOC15(VCN, i, mmUVD_RB_SIZE2, ring->ring_size / 4);
1097 		fw_shared->multi_queue.encode_lowlatency_queue_mode &= ~FW_QUEUE_RING_RESET;
1098 	}
1099 
1100 	return 0;
1101 }
1102 
1103 static int vcn_v2_5_mmsch_start(struct amdgpu_device *adev,
1104 				struct amdgpu_mm_table *table)
1105 {
1106 	uint32_t data = 0, loop = 0, size = 0;
1107 	uint64_t addr = table->gpu_addr;
1108 	struct mmsch_v1_1_init_header *header = NULL;
1109 
1110 	header = (struct mmsch_v1_1_init_header *)table->cpu_addr;
1111 	size = header->total_size;
1112 
1113 	/*
1114 	 * 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of
1115 	 *  memory descriptor location
1116 	 */
1117 	WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_ADDR_LO, lower_32_bits(addr));
1118 	WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_ADDR_HI, upper_32_bits(addr));
1119 
1120 	/* 2, update vmid of descriptor */
1121 	data = RREG32_SOC15(VCN, 0, mmMMSCH_VF_VMID);
1122 	data &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
1123 	/* use domain0 for MM scheduler */
1124 	data |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
1125 	WREG32_SOC15(VCN, 0, mmMMSCH_VF_VMID, data);
1126 
1127 	/* 3, notify mmsch about the size of this descriptor */
1128 	WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_SIZE, size);
1129 
1130 	/* 4, set resp to zero */
1131 	WREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_RESP, 0);
1132 
1133 	/*
1134 	 * 5, kick off the initialization and wait until
1135 	 * VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero
1136 	 */
1137 	WREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_HOST, 0x10000001);
1138 
1139 	data = RREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_RESP);
1140 	loop = 10;
1141 	while ((data & 0x10000002) != 0x10000002) {
1142 		udelay(100);
1143 		data = RREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_RESP);
1144 		loop--;
1145 		if (!loop)
1146 			break;
1147 	}
1148 
1149 	if (!loop) {
1150 		dev_err(adev->dev,
1151 			"failed to init MMSCH, mmMMSCH_VF_MAILBOX_RESP = %x\n",
1152 			data);
1153 		return -EBUSY;
1154 	}
1155 
1156 	return 0;
1157 }
1158 
1159 static int vcn_v2_5_sriov_start(struct amdgpu_device *adev)
1160 {
1161 	struct amdgpu_ring *ring;
1162 	uint32_t offset, size, tmp, i, rb_bufsz;
1163 	uint32_t table_size = 0;
1164 	struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
1165 	struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
1166 	struct mmsch_v1_0_cmd_end end = { { 0 } };
1167 	uint32_t *init_table = adev->virt.mm_table.cpu_addr;
1168 	struct mmsch_v1_1_init_header *header = (struct mmsch_v1_1_init_header *)init_table;
1169 
1170 	direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
1171 	direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
1172 	end.cmd_header.command_type = MMSCH_COMMAND__END;
1173 
1174 	header->version = MMSCH_VERSION;
1175 	header->total_size = sizeof(struct mmsch_v1_1_init_header) >> 2;
1176 	init_table += header->total_size;
1177 
1178 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1179 		header->eng[i].table_offset = header->total_size;
1180 		header->eng[i].init_status = 0;
1181 		header->eng[i].table_size = 0;
1182 
1183 		table_size = 0;
1184 
1185 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(
1186 			SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS),
1187 			~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
1188 
1189 		size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
1190 		/* mc resume*/
1191 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1192 			MMSCH_V1_0_INSERT_DIRECT_WT(
1193 				SOC15_REG_OFFSET(VCN, i,
1194 					mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
1195 				adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo);
1196 			MMSCH_V1_0_INSERT_DIRECT_WT(
1197 				SOC15_REG_OFFSET(VCN, i,
1198 					mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
1199 				adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi);
1200 			offset = 0;
1201 			MMSCH_V1_0_INSERT_DIRECT_WT(
1202 				SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_OFFSET0), 0);
1203 		} else {
1204 			MMSCH_V1_0_INSERT_DIRECT_WT(
1205 				SOC15_REG_OFFSET(VCN, i,
1206 					mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
1207 				lower_32_bits(adev->vcn.inst[i].gpu_addr));
1208 			MMSCH_V1_0_INSERT_DIRECT_WT(
1209 				SOC15_REG_OFFSET(VCN, i,
1210 					mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
1211 				upper_32_bits(adev->vcn.inst[i].gpu_addr));
1212 			offset = size;
1213 			MMSCH_V1_0_INSERT_DIRECT_WT(
1214 				SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_OFFSET0),
1215 				AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
1216 		}
1217 
1218 		MMSCH_V1_0_INSERT_DIRECT_WT(
1219 			SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_SIZE0),
1220 			size);
1221 		MMSCH_V1_0_INSERT_DIRECT_WT(
1222 			SOC15_REG_OFFSET(VCN, i,
1223 				mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
1224 			lower_32_bits(adev->vcn.inst[i].gpu_addr + offset));
1225 		MMSCH_V1_0_INSERT_DIRECT_WT(
1226 			SOC15_REG_OFFSET(VCN, i,
1227 				mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
1228 			upper_32_bits(adev->vcn.inst[i].gpu_addr + offset));
1229 		MMSCH_V1_0_INSERT_DIRECT_WT(
1230 			SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_OFFSET1),
1231 			0);
1232 		MMSCH_V1_0_INSERT_DIRECT_WT(
1233 			SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_SIZE1),
1234 			AMDGPU_VCN_STACK_SIZE);
1235 		MMSCH_V1_0_INSERT_DIRECT_WT(
1236 			SOC15_REG_OFFSET(VCN, i,
1237 				mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
1238 			lower_32_bits(adev->vcn.inst[i].gpu_addr + offset +
1239 				AMDGPU_VCN_STACK_SIZE));
1240 		MMSCH_V1_0_INSERT_DIRECT_WT(
1241 			SOC15_REG_OFFSET(VCN, i,
1242 				mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
1243 			upper_32_bits(adev->vcn.inst[i].gpu_addr + offset +
1244 				AMDGPU_VCN_STACK_SIZE));
1245 		MMSCH_V1_0_INSERT_DIRECT_WT(
1246 			SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_OFFSET2),
1247 			0);
1248 		MMSCH_V1_0_INSERT_DIRECT_WT(
1249 			SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_SIZE2),
1250 			AMDGPU_VCN_CONTEXT_SIZE);
1251 
1252 		ring = &adev->vcn.inst[i].ring_enc[0];
1253 		ring->wptr = 0;
1254 
1255 		MMSCH_V1_0_INSERT_DIRECT_WT(
1256 			SOC15_REG_OFFSET(VCN, i, mmUVD_RB_BASE_LO),
1257 			lower_32_bits(ring->gpu_addr));
1258 		MMSCH_V1_0_INSERT_DIRECT_WT(
1259 			SOC15_REG_OFFSET(VCN, i, mmUVD_RB_BASE_HI),
1260 			upper_32_bits(ring->gpu_addr));
1261 		MMSCH_V1_0_INSERT_DIRECT_WT(
1262 			SOC15_REG_OFFSET(VCN, i, mmUVD_RB_SIZE),
1263 			ring->ring_size / 4);
1264 
1265 		ring = &adev->vcn.inst[i].ring_dec;
1266 		ring->wptr = 0;
1267 		MMSCH_V1_0_INSERT_DIRECT_WT(
1268 			SOC15_REG_OFFSET(VCN, i,
1269 				mmUVD_LMI_RBC_RB_64BIT_BAR_LOW),
1270 			lower_32_bits(ring->gpu_addr));
1271 		MMSCH_V1_0_INSERT_DIRECT_WT(
1272 			SOC15_REG_OFFSET(VCN, i,
1273 				mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH),
1274 			upper_32_bits(ring->gpu_addr));
1275 
1276 		/* force RBC into idle state */
1277 		rb_bufsz = order_base_2(ring->ring_size);
1278 		tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
1279 		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
1280 		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
1281 		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
1282 		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
1283 		MMSCH_V1_0_INSERT_DIRECT_WT(
1284 			SOC15_REG_OFFSET(VCN, i, mmUVD_RBC_RB_CNTL), tmp);
1285 
1286 		/* add end packet */
1287 		memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
1288 		table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
1289 		init_table += sizeof(struct mmsch_v1_0_cmd_end) / 4;
1290 
1291 		/* refine header */
1292 		header->eng[i].table_size = table_size;
1293 		header->total_size += table_size;
1294 	}
1295 
1296 	return vcn_v2_5_mmsch_start(adev, &adev->virt.mm_table);
1297 }
1298 
1299 static int vcn_v2_5_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
1300 {
1301 	uint32_t tmp;
1302 
1303 	/* Wait for power status to be 1 */
1304 	SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1,
1305 		UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1306 
1307 	/* wait for read ptr to be equal to write ptr */
1308 	tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR);
1309 	SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF);
1310 
1311 	tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2);
1312 	SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF);
1313 
1314 	tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF;
1315 	SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF);
1316 
1317 	SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1,
1318 		UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1319 
1320 	/* disable dynamic power gating mode */
1321 	WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 0,
1322 			~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
1323 
1324 	return 0;
1325 }
1326 
1327 static int vcn_v2_5_stop(struct amdgpu_device *adev)
1328 {
1329 	uint32_t tmp;
1330 	int i, r = 0;
1331 
1332 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1333 		if (adev->vcn.harvest_config & (1 << i))
1334 			continue;
1335 		if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
1336 			r = vcn_v2_5_stop_dpg_mode(adev, i);
1337 			continue;
1338 		}
1339 
1340 		/* wait for vcn idle */
1341 		r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7);
1342 		if (r)
1343 			return r;
1344 
1345 		tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
1346 			UVD_LMI_STATUS__READ_CLEAN_MASK |
1347 			UVD_LMI_STATUS__WRITE_CLEAN_MASK |
1348 			UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
1349 		r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
1350 		if (r)
1351 			return r;
1352 
1353 		/* block LMI UMC channel */
1354 		tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2);
1355 		tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
1356 		WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2, tmp);
1357 
1358 		tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK|
1359 			UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
1360 		r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
1361 		if (r)
1362 			return r;
1363 
1364 		/* block VCPU register access */
1365 		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL),
1366 			UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
1367 			~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
1368 
1369 		/* reset VCPU */
1370 		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
1371 			UVD_VCPU_CNTL__BLK_RST_MASK,
1372 			~UVD_VCPU_CNTL__BLK_RST_MASK);
1373 
1374 		/* disable VCPU clock */
1375 		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
1376 			~(UVD_VCPU_CNTL__CLK_EN_MASK));
1377 
1378 		/* clear status */
1379 		WREG32_SOC15(VCN, i, mmUVD_STATUS, 0);
1380 
1381 		vcn_v2_5_enable_clock_gating(adev);
1382 
1383 		/* enable register anti-hang mechanism */
1384 		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_POWER_STATUS),
1385 			UVD_POWER_STATUS__UVD_POWER_STATUS_MASK,
1386 			~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1387 	}
1388 
1389 	if (adev->pm.dpm_enabled)
1390 		amdgpu_dpm_enable_uvd(adev, false);
1391 
1392 	return 0;
1393 }
1394 
1395 static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev,
1396 				int inst_idx, struct dpg_pause_state *new_state)
1397 {
1398 	struct amdgpu_ring *ring;
1399 	uint32_t reg_data = 0;
1400 	int ret_code = 0;
1401 
1402 	/* pause/unpause if state is changed */
1403 	if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) {
1404 		DRM_DEBUG("dpg pause state changed %d -> %d",
1405 			adev->vcn.inst[inst_idx].pause_state.fw_based,	new_state->fw_based);
1406 		reg_data = RREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE) &
1407 			(~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
1408 
1409 		if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
1410 			ret_code = SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 0x1,
1411 				UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1412 
1413 			if (!ret_code) {
1414 				volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr;
1415 
1416 				/* pause DPG */
1417 				reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
1418 				WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE, reg_data);
1419 
1420 				/* wait for ACK */
1421 				SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_DPG_PAUSE,
1422 					   UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
1423 					   UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
1424 
1425 				/* Stall DPG before WPTR/RPTR reset */
1426 				WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
1427 					   UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK,
1428 					   ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
1429 
1430 				/* Restore */
1431 				fw_shared->multi_queue.encode_generalpurpose_queue_mode |= FW_QUEUE_RING_RESET;
1432 				ring = &adev->vcn.inst[inst_idx].ring_enc[0];
1433 				ring->wptr = 0;
1434 				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO, ring->gpu_addr);
1435 				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
1436 				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE, ring->ring_size / 4);
1437 				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
1438 				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
1439 				fw_shared->multi_queue.encode_generalpurpose_queue_mode &= ~FW_QUEUE_RING_RESET;
1440 
1441 				fw_shared->multi_queue.encode_lowlatency_queue_mode |= FW_QUEUE_RING_RESET;
1442 				ring = &adev->vcn.inst[inst_idx].ring_enc[1];
1443 				ring->wptr = 0;
1444 				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO2, ring->gpu_addr);
1445 				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
1446 				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE2, ring->ring_size / 4);
1447 				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
1448 				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
1449 				fw_shared->multi_queue.encode_lowlatency_queue_mode &= ~FW_QUEUE_RING_RESET;
1450 
1451 				/* Unstall DPG */
1452 				WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
1453 					   0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
1454 
1455 				SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS,
1456 					   UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1457 			}
1458 		} else {
1459 			reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
1460 			WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE, reg_data);
1461 			SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 0x1,
1462 				UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1463 		}
1464 		adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based;
1465 	}
1466 
1467 	return 0;
1468 }
1469 
1470 /**
1471  * vcn_v2_5_dec_ring_get_rptr - get read pointer
1472  *
1473  * @ring: amdgpu_ring pointer
1474  *
1475  * Returns the current hardware read pointer
1476  */
1477 static uint64_t vcn_v2_5_dec_ring_get_rptr(struct amdgpu_ring *ring)
1478 {
1479 	struct amdgpu_device *adev = ring->adev;
1480 
1481 	return RREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_RPTR);
1482 }
1483 
1484 /**
1485  * vcn_v2_5_dec_ring_get_wptr - get write pointer
1486  *
1487  * @ring: amdgpu_ring pointer
1488  *
1489  * Returns the current hardware write pointer
1490  */
1491 static uint64_t vcn_v2_5_dec_ring_get_wptr(struct amdgpu_ring *ring)
1492 {
1493 	struct amdgpu_device *adev = ring->adev;
1494 
1495 	if (ring->use_doorbell)
1496 		return adev->wb.wb[ring->wptr_offs];
1497 	else
1498 		return RREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_WPTR);
1499 }
1500 
1501 /**
1502  * vcn_v2_5_dec_ring_set_wptr - set write pointer
1503  *
1504  * @ring: amdgpu_ring pointer
1505  *
1506  * Commits the write pointer to the hardware
1507  */
1508 static void vcn_v2_5_dec_ring_set_wptr(struct amdgpu_ring *ring)
1509 {
1510 	struct amdgpu_device *adev = ring->adev;
1511 
1512 	if (ring->use_doorbell) {
1513 		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
1514 		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
1515 	} else {
1516 		WREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
1517 	}
1518 }
1519 
1520 static const struct amdgpu_ring_funcs vcn_v2_5_dec_ring_vm_funcs = {
1521 	.type = AMDGPU_RING_TYPE_VCN_DEC,
1522 	.align_mask = 0xf,
1523 	.vmhub = AMDGPU_MMHUB_1,
1524 	.get_rptr = vcn_v2_5_dec_ring_get_rptr,
1525 	.get_wptr = vcn_v2_5_dec_ring_get_wptr,
1526 	.set_wptr = vcn_v2_5_dec_ring_set_wptr,
1527 	.emit_frame_size =
1528 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
1529 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
1530 		8 + /* vcn_v2_0_dec_ring_emit_vm_flush */
1531 		14 + 14 + /* vcn_v2_0_dec_ring_emit_fence x2 vm fence */
1532 		6,
1533 	.emit_ib_size = 8, /* vcn_v2_0_dec_ring_emit_ib */
1534 	.emit_ib = vcn_v2_0_dec_ring_emit_ib,
1535 	.emit_fence = vcn_v2_0_dec_ring_emit_fence,
1536 	.emit_vm_flush = vcn_v2_0_dec_ring_emit_vm_flush,
1537 	.test_ring = vcn_v2_0_dec_ring_test_ring,
1538 	.test_ib = amdgpu_vcn_dec_ring_test_ib,
1539 	.insert_nop = vcn_v2_0_dec_ring_insert_nop,
1540 	.insert_start = vcn_v2_0_dec_ring_insert_start,
1541 	.insert_end = vcn_v2_0_dec_ring_insert_end,
1542 	.pad_ib = amdgpu_ring_generic_pad_ib,
1543 	.begin_use = amdgpu_vcn_ring_begin_use,
1544 	.end_use = amdgpu_vcn_ring_end_use,
1545 	.emit_wreg = vcn_v2_0_dec_ring_emit_wreg,
1546 	.emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait,
1547 	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1548 };
1549 
1550 static const struct amdgpu_ring_funcs vcn_v2_6_dec_ring_vm_funcs = {
1551 	.type = AMDGPU_RING_TYPE_VCN_DEC,
1552 	.align_mask = 0xf,
1553 	.vmhub = AMDGPU_MMHUB_0,
1554 	.get_rptr = vcn_v2_5_dec_ring_get_rptr,
1555 	.get_wptr = vcn_v2_5_dec_ring_get_wptr,
1556 	.set_wptr = vcn_v2_5_dec_ring_set_wptr,
1557 	.emit_frame_size =
1558 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
1559 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
1560 		8 + /* vcn_v2_0_dec_ring_emit_vm_flush */
1561 		14 + 14 + /* vcn_v2_0_dec_ring_emit_fence x2 vm fence */
1562 		6,
1563 	.emit_ib_size = 8, /* vcn_v2_0_dec_ring_emit_ib */
1564 	.emit_ib = vcn_v2_0_dec_ring_emit_ib,
1565 	.emit_fence = vcn_v2_0_dec_ring_emit_fence,
1566 	.emit_vm_flush = vcn_v2_0_dec_ring_emit_vm_flush,
1567 	.test_ring = vcn_v2_0_dec_ring_test_ring,
1568 	.test_ib = amdgpu_vcn_dec_ring_test_ib,
1569 	.insert_nop = vcn_v2_0_dec_ring_insert_nop,
1570 	.insert_start = vcn_v2_0_dec_ring_insert_start,
1571 	.insert_end = vcn_v2_0_dec_ring_insert_end,
1572 	.pad_ib = amdgpu_ring_generic_pad_ib,
1573 	.begin_use = amdgpu_vcn_ring_begin_use,
1574 	.end_use = amdgpu_vcn_ring_end_use,
1575 	.emit_wreg = vcn_v2_0_dec_ring_emit_wreg,
1576 	.emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait,
1577 	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1578 };
1579 
1580 /**
1581  * vcn_v2_5_enc_ring_get_rptr - get enc read pointer
1582  *
1583  * @ring: amdgpu_ring pointer
1584  *
1585  * Returns the current hardware enc read pointer
1586  */
1587 static uint64_t vcn_v2_5_enc_ring_get_rptr(struct amdgpu_ring *ring)
1588 {
1589 	struct amdgpu_device *adev = ring->adev;
1590 
1591 	if (ring == &adev->vcn.inst[ring->me].ring_enc[0])
1592 		return RREG32_SOC15(VCN, ring->me, mmUVD_RB_RPTR);
1593 	else
1594 		return RREG32_SOC15(VCN, ring->me, mmUVD_RB_RPTR2);
1595 }
1596 
1597 /**
1598  * vcn_v2_5_enc_ring_get_wptr - get enc write pointer
1599  *
1600  * @ring: amdgpu_ring pointer
1601  *
1602  * Returns the current hardware enc write pointer
1603  */
1604 static uint64_t vcn_v2_5_enc_ring_get_wptr(struct amdgpu_ring *ring)
1605 {
1606 	struct amdgpu_device *adev = ring->adev;
1607 
1608 	if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) {
1609 		if (ring->use_doorbell)
1610 			return adev->wb.wb[ring->wptr_offs];
1611 		else
1612 			return RREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR);
1613 	} else {
1614 		if (ring->use_doorbell)
1615 			return adev->wb.wb[ring->wptr_offs];
1616 		else
1617 			return RREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR2);
1618 	}
1619 }
1620 
1621 /**
1622  * vcn_v2_5_enc_ring_set_wptr - set enc write pointer
1623  *
1624  * @ring: amdgpu_ring pointer
1625  *
1626  * Commits the enc write pointer to the hardware
1627  */
1628 static void vcn_v2_5_enc_ring_set_wptr(struct amdgpu_ring *ring)
1629 {
1630 	struct amdgpu_device *adev = ring->adev;
1631 
1632 	if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) {
1633 		if (ring->use_doorbell) {
1634 			adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
1635 			WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
1636 		} else {
1637 			WREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
1638 		}
1639 	} else {
1640 		if (ring->use_doorbell) {
1641 			adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
1642 			WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
1643 		} else {
1644 			WREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
1645 		}
1646 	}
1647 }
1648 
1649 static const struct amdgpu_ring_funcs vcn_v2_5_enc_ring_vm_funcs = {
1650 	.type = AMDGPU_RING_TYPE_VCN_ENC,
1651 	.align_mask = 0x3f,
1652 	.nop = VCN_ENC_CMD_NO_OP,
1653 	.vmhub = AMDGPU_MMHUB_1,
1654 	.get_rptr = vcn_v2_5_enc_ring_get_rptr,
1655 	.get_wptr = vcn_v2_5_enc_ring_get_wptr,
1656 	.set_wptr = vcn_v2_5_enc_ring_set_wptr,
1657 	.emit_frame_size =
1658 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1659 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1660 		4 + /* vcn_v2_0_enc_ring_emit_vm_flush */
1661 		5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */
1662 		1, /* vcn_v2_0_enc_ring_insert_end */
1663 	.emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */
1664 	.emit_ib = vcn_v2_0_enc_ring_emit_ib,
1665 	.emit_fence = vcn_v2_0_enc_ring_emit_fence,
1666 	.emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush,
1667 	.test_ring = amdgpu_vcn_enc_ring_test_ring,
1668 	.test_ib = amdgpu_vcn_enc_ring_test_ib,
1669 	.insert_nop = amdgpu_ring_insert_nop,
1670 	.insert_end = vcn_v2_0_enc_ring_insert_end,
1671 	.pad_ib = amdgpu_ring_generic_pad_ib,
1672 	.begin_use = amdgpu_vcn_ring_begin_use,
1673 	.end_use = amdgpu_vcn_ring_end_use,
1674 	.emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
1675 	.emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
1676 	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1677 };
1678 
1679 static const struct amdgpu_ring_funcs vcn_v2_6_enc_ring_vm_funcs = {
1680         .type = AMDGPU_RING_TYPE_VCN_ENC,
1681         .align_mask = 0x3f,
1682         .nop = VCN_ENC_CMD_NO_OP,
1683         .vmhub = AMDGPU_MMHUB_0,
1684         .get_rptr = vcn_v2_5_enc_ring_get_rptr,
1685         .get_wptr = vcn_v2_5_enc_ring_get_wptr,
1686         .set_wptr = vcn_v2_5_enc_ring_set_wptr,
1687         .emit_frame_size =
1688                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1689                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1690                 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */
1691                 5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */
1692                 1, /* vcn_v2_0_enc_ring_insert_end */
1693         .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */
1694         .emit_ib = vcn_v2_0_enc_ring_emit_ib,
1695         .emit_fence = vcn_v2_0_enc_ring_emit_fence,
1696         .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush,
1697         .test_ring = amdgpu_vcn_enc_ring_test_ring,
1698         .test_ib = amdgpu_vcn_enc_ring_test_ib,
1699         .insert_nop = amdgpu_ring_insert_nop,
1700         .insert_end = vcn_v2_0_enc_ring_insert_end,
1701         .pad_ib = amdgpu_ring_generic_pad_ib,
1702         .begin_use = amdgpu_vcn_ring_begin_use,
1703         .end_use = amdgpu_vcn_ring_end_use,
1704         .emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
1705         .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
1706         .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1707 };
1708 
1709 static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev)
1710 {
1711 	int i;
1712 
1713 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1714 		if (adev->vcn.harvest_config & (1 << i))
1715 			continue;
1716 		if (adev->asic_type == CHIP_ARCTURUS)
1717 			adev->vcn.inst[i].ring_dec.funcs = &vcn_v2_5_dec_ring_vm_funcs;
1718 		else /* CHIP_ALDEBARAN */
1719 			adev->vcn.inst[i].ring_dec.funcs = &vcn_v2_6_dec_ring_vm_funcs;
1720 		adev->vcn.inst[i].ring_dec.me = i;
1721 		DRM_INFO("VCN(%d) decode is enabled in VM mode\n", i);
1722 	}
1723 }
1724 
1725 static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev)
1726 {
1727 	int i, j;
1728 
1729 	for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
1730 		if (adev->vcn.harvest_config & (1 << j))
1731 			continue;
1732 		for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
1733 			if (adev->asic_type == CHIP_ARCTURUS)
1734 				adev->vcn.inst[j].ring_enc[i].funcs = &vcn_v2_5_enc_ring_vm_funcs;
1735 			else /* CHIP_ALDEBARAN */
1736 				adev->vcn.inst[j].ring_enc[i].funcs = &vcn_v2_6_enc_ring_vm_funcs;
1737 			adev->vcn.inst[j].ring_enc[i].me = j;
1738 		}
1739 		DRM_INFO("VCN(%d) encode is enabled in VM mode\n", j);
1740 	}
1741 }
1742 
1743 static bool vcn_v2_5_is_idle(void *handle)
1744 {
1745 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1746 	int i, ret = 1;
1747 
1748 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1749 		if (adev->vcn.harvest_config & (1 << i))
1750 			continue;
1751 		ret &= (RREG32_SOC15(VCN, i, mmUVD_STATUS) == UVD_STATUS__IDLE);
1752 	}
1753 
1754 	return ret;
1755 }
1756 
1757 static int vcn_v2_5_wait_for_idle(void *handle)
1758 {
1759 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1760 	int i, ret = 0;
1761 
1762 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1763 		if (adev->vcn.harvest_config & (1 << i))
1764 			continue;
1765 		ret = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE,
1766 			UVD_STATUS__IDLE);
1767 		if (ret)
1768 			return ret;
1769 	}
1770 
1771 	return ret;
1772 }
1773 
1774 static int vcn_v2_5_set_clockgating_state(void *handle,
1775 					  enum amd_clockgating_state state)
1776 {
1777 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1778 	bool enable = (state == AMD_CG_STATE_GATE);
1779 
1780 	if (amdgpu_sriov_vf(adev))
1781 		return 0;
1782 
1783 	if (enable) {
1784 		if (!vcn_v2_5_is_idle(handle))
1785 			return -EBUSY;
1786 		vcn_v2_5_enable_clock_gating(adev);
1787 	} else {
1788 		vcn_v2_5_disable_clock_gating(adev);
1789 	}
1790 
1791 	return 0;
1792 }
1793 
1794 static int vcn_v2_5_set_powergating_state(void *handle,
1795 					  enum amd_powergating_state state)
1796 {
1797 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1798 	int ret;
1799 
1800 	if (amdgpu_sriov_vf(adev))
1801 		return 0;
1802 
1803 	if(state == adev->vcn.cur_state)
1804 		return 0;
1805 
1806 	if (state == AMD_PG_STATE_GATE)
1807 		ret = vcn_v2_5_stop(adev);
1808 	else
1809 		ret = vcn_v2_5_start(adev);
1810 
1811 	if(!ret)
1812 		adev->vcn.cur_state = state;
1813 
1814 	return ret;
1815 }
1816 
1817 static int vcn_v2_5_set_interrupt_state(struct amdgpu_device *adev,
1818 					struct amdgpu_irq_src *source,
1819 					unsigned type,
1820 					enum amdgpu_interrupt_state state)
1821 {
1822 	return 0;
1823 }
1824 
1825 static int vcn_v2_5_process_interrupt(struct amdgpu_device *adev,
1826 				      struct amdgpu_irq_src *source,
1827 				      struct amdgpu_iv_entry *entry)
1828 {
1829 	uint32_t ip_instance;
1830 
1831 	switch (entry->client_id) {
1832 	case SOC15_IH_CLIENTID_VCN:
1833 		ip_instance = 0;
1834 		break;
1835 	case SOC15_IH_CLIENTID_VCN1:
1836 		ip_instance = 1;
1837 		break;
1838 	default:
1839 		DRM_ERROR("Unhandled client id: %d\n", entry->client_id);
1840 		return 0;
1841 	}
1842 
1843 	DRM_DEBUG("IH: VCN TRAP\n");
1844 
1845 	switch (entry->src_id) {
1846 	case VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT:
1847 		amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_dec);
1848 		break;
1849 	case VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
1850 		amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]);
1851 		break;
1852 	case VCN_2_0__SRCID__UVD_ENC_LOW_LATENCY:
1853 		amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[1]);
1854 		break;
1855 	default:
1856 		DRM_ERROR("Unhandled interrupt: %d %d\n",
1857 			  entry->src_id, entry->src_data[0]);
1858 		break;
1859 	}
1860 
1861 	return 0;
1862 }
1863 
1864 static const struct amdgpu_irq_src_funcs vcn_v2_5_irq_funcs = {
1865 	.set = vcn_v2_5_set_interrupt_state,
1866 	.process = vcn_v2_5_process_interrupt,
1867 };
1868 
1869 static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev)
1870 {
1871 	int i;
1872 
1873 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1874 		if (adev->vcn.harvest_config & (1 << i))
1875 			continue;
1876 		adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 1;
1877 		adev->vcn.inst[i].irq.funcs = &vcn_v2_5_irq_funcs;
1878 	}
1879 }
1880 
1881 static const struct amd_ip_funcs vcn_v2_5_ip_funcs = {
1882 	.name = "vcn_v2_5",
1883 	.early_init = vcn_v2_5_early_init,
1884 	.late_init = NULL,
1885 	.sw_init = vcn_v2_5_sw_init,
1886 	.sw_fini = vcn_v2_5_sw_fini,
1887 	.hw_init = vcn_v2_5_hw_init,
1888 	.hw_fini = vcn_v2_5_hw_fini,
1889 	.suspend = vcn_v2_5_suspend,
1890 	.resume = vcn_v2_5_resume,
1891 	.is_idle = vcn_v2_5_is_idle,
1892 	.wait_for_idle = vcn_v2_5_wait_for_idle,
1893 	.check_soft_reset = NULL,
1894 	.pre_soft_reset = NULL,
1895 	.soft_reset = NULL,
1896 	.post_soft_reset = NULL,
1897 	.set_clockgating_state = vcn_v2_5_set_clockgating_state,
1898 	.set_powergating_state = vcn_v2_5_set_powergating_state,
1899 };
1900 
1901 static const struct amd_ip_funcs vcn_v2_6_ip_funcs = {
1902         .name = "vcn_v2_6",
1903         .early_init = vcn_v2_5_early_init,
1904         .late_init = NULL,
1905         .sw_init = vcn_v2_5_sw_init,
1906         .sw_fini = vcn_v2_5_sw_fini,
1907         .hw_init = vcn_v2_5_hw_init,
1908         .hw_fini = vcn_v2_5_hw_fini,
1909         .suspend = vcn_v2_5_suspend,
1910         .resume = vcn_v2_5_resume,
1911         .is_idle = vcn_v2_5_is_idle,
1912         .wait_for_idle = vcn_v2_5_wait_for_idle,
1913         .check_soft_reset = NULL,
1914         .pre_soft_reset = NULL,
1915         .soft_reset = NULL,
1916         .post_soft_reset = NULL,
1917         .set_clockgating_state = vcn_v2_5_set_clockgating_state,
1918         .set_powergating_state = vcn_v2_5_set_powergating_state,
1919 };
1920 
1921 const struct amdgpu_ip_block_version vcn_v2_5_ip_block =
1922 {
1923 		.type = AMD_IP_BLOCK_TYPE_VCN,
1924 		.major = 2,
1925 		.minor = 5,
1926 		.rev = 0,
1927 		.funcs = &vcn_v2_5_ip_funcs,
1928 };
1929 
1930 const struct amdgpu_ip_block_version vcn_v2_6_ip_block =
1931 {
1932 		.type = AMD_IP_BLOCK_TYPE_VCN,
1933 		.major = 2,
1934 		.minor = 6,
1935 		.rev = 0,
1936 		.funcs = &vcn_v2_6_ip_funcs,
1937 };
1938