xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c (revision 442d61af)
1 /*
2  * Copyright 2021 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/firmware.h>
25 #include "amdgpu.h"
26 #include "amdgpu_vcn.h"
27 #include "amdgpu_pm.h"
28 #include "amdgpu_cs.h"
29 #include "soc15.h"
30 #include "soc15d.h"
31 #include "soc15_hw_ip.h"
32 #include "vcn_v2_0.h"
33 #include "mmsch_v4_0.h"
34 #include "vcn_v4_0.h"
35 
36 #include "vcn/vcn_4_0_0_offset.h"
37 #include "vcn/vcn_4_0_0_sh_mask.h"
38 #include "ivsrcid/vcn/irqsrcs_vcn_4_0.h"
39 
40 #include <drm/drm_drv.h>
41 
42 #define mmUVD_DPG_LMA_CTL							regUVD_DPG_LMA_CTL
43 #define mmUVD_DPG_LMA_CTL_BASE_IDX						regUVD_DPG_LMA_CTL_BASE_IDX
44 #define mmUVD_DPG_LMA_DATA							regUVD_DPG_LMA_DATA
45 #define mmUVD_DPG_LMA_DATA_BASE_IDX						regUVD_DPG_LMA_DATA_BASE_IDX
46 
47 #define VCN_VID_SOC_ADDRESS_2_0							0x1fb00
48 #define VCN1_VID_SOC_ADDRESS_3_0						0x48300
49 
50 #define VCN_HARVEST_MMSCH								0
51 
52 #define RDECODE_MSG_CREATE							0x00000000
53 #define RDECODE_MESSAGE_CREATE							0x00000001
54 
55 static int amdgpu_ih_clientid_vcns[] = {
56 	SOC15_IH_CLIENTID_VCN,
57 	SOC15_IH_CLIENTID_VCN1
58 };
59 
60 static int vcn_v4_0_start_sriov(struct amdgpu_device *adev);
61 static void vcn_v4_0_set_unified_ring_funcs(struct amdgpu_device *adev);
62 static void vcn_v4_0_set_irq_funcs(struct amdgpu_device *adev);
63 static int vcn_v4_0_set_powergating_state(void *handle,
64         enum amd_powergating_state state);
65 static int vcn_v4_0_pause_dpg_mode(struct amdgpu_device *adev,
66         int inst_idx, struct dpg_pause_state *new_state);
67 static void vcn_v4_0_unified_ring_set_wptr(struct amdgpu_ring *ring);
68 static void vcn_v4_0_set_ras_funcs(struct amdgpu_device *adev);
69 
70 /**
71  * vcn_v4_0_early_init - set function pointers and load microcode
72  *
73  * @handle: amdgpu_device pointer
74  *
75  * Set ring and irq function pointers
76  * Load microcode from filesystem
77  */
78 static int vcn_v4_0_early_init(void *handle)
79 {
80 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
81 
82 	if (amdgpu_sriov_vf(adev))
83 		adev->vcn.harvest_config = VCN_HARVEST_MMSCH;
84 
85 	/* re-use enc ring as unified ring */
86 	adev->vcn.num_enc_rings = 1;
87 
88 	vcn_v4_0_set_unified_ring_funcs(adev);
89 	vcn_v4_0_set_irq_funcs(adev);
90 	vcn_v4_0_set_ras_funcs(adev);
91 
92 	return amdgpu_vcn_early_init(adev);
93 }
94 
95 /**
96  * vcn_v4_0_sw_init - sw init for VCN block
97  *
98  * @handle: amdgpu_device pointer
99  *
100  * Load firmware and sw initialization
101  */
102 static int vcn_v4_0_sw_init(void *handle)
103 {
104 	struct amdgpu_ring *ring;
105 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
106 	int i, r;
107 
108 	r = amdgpu_vcn_sw_init(adev);
109 	if (r)
110 		return r;
111 
112 	amdgpu_vcn_setup_ucode(adev);
113 
114 	r = amdgpu_vcn_resume(adev);
115 	if (r)
116 		return r;
117 
118 	for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
119 		volatile struct amdgpu_vcn4_fw_shared *fw_shared;
120 
121 		if (adev->vcn.harvest_config & (1 << i))
122 			continue;
123 
124 		atomic_set(&adev->vcn.inst[i].sched_score, 0);
125 
126 		/* VCN UNIFIED TRAP */
127 		r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i],
128 				VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq);
129 		if (r)
130 			return r;
131 
132 		/* VCN POISON TRAP */
133 		r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i],
134 				VCN_4_0__SRCID_UVD_POISON, &adev->vcn.inst[i].irq);
135 		if (r)
136 			return r;
137 
138 		ring = &adev->vcn.inst[i].ring_enc[0];
139 		ring->use_doorbell = true;
140 		if (amdgpu_sriov_vf(adev))
141 			ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + i * (adev->vcn.num_enc_rings + 1) + 1;
142 		else
143 			ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + 8 * i;
144 
145 		sprintf(ring->name, "vcn_unified_%d", i);
146 
147 		r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0,
148 						AMDGPU_RING_PRIO_0, &adev->vcn.inst[i].sched_score);
149 		if (r)
150 			return r;
151 
152 		fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
153 		fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
154 		fw_shared->sq.is_enabled = 1;
155 
156 		fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG);
157 		fw_shared->smu_dpm_interface.smu_interface_type = (adev->flags & AMD_IS_APU) ?
158 			AMDGPU_VCN_SMU_DPM_INTERFACE_APU : AMDGPU_VCN_SMU_DPM_INTERFACE_DGPU;
159 
160 		if (amdgpu_sriov_vf(adev))
161 			fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
162 
163 		if (amdgpu_vcnfw_log)
164 			amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
165 	}
166 
167 	if (amdgpu_sriov_vf(adev)) {
168 		r = amdgpu_virt_alloc_mm_table(adev);
169 		if (r)
170 			return r;
171 	}
172 
173 	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
174 		adev->vcn.pause_dpg_mode = vcn_v4_0_pause_dpg_mode;
175 
176 	return 0;
177 }
178 
179 /**
180  * vcn_v4_0_sw_fini - sw fini for VCN block
181  *
182  * @handle: amdgpu_device pointer
183  *
184  * VCN suspend and free up sw allocation
185  */
186 static int vcn_v4_0_sw_fini(void *handle)
187 {
188 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
189 	int i, r, idx;
190 
191 	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
192 		for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
193 			volatile struct amdgpu_vcn4_fw_shared *fw_shared;
194 
195 			if (adev->vcn.harvest_config & (1 << i))
196 				continue;
197 
198 			fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
199 			fw_shared->present_flag_0 = 0;
200 			fw_shared->sq.is_enabled = 0;
201 		}
202 
203 		drm_dev_exit(idx);
204 	}
205 
206 	if (amdgpu_sriov_vf(adev))
207 		amdgpu_virt_free_mm_table(adev);
208 
209 	r = amdgpu_vcn_suspend(adev);
210 	if (r)
211 		return r;
212 
213 	r = amdgpu_vcn_sw_fini(adev);
214 
215 	return r;
216 }
217 
218 /**
219  * vcn_v4_0_hw_init - start and test VCN block
220  *
221  * @handle: amdgpu_device pointer
222  *
223  * Initialize the hardware, boot up the VCPU and do some testing
224  */
225 static int vcn_v4_0_hw_init(void *handle)
226 {
227 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
228 	struct amdgpu_ring *ring;
229 	int i, r;
230 
231 	if (amdgpu_sriov_vf(adev)) {
232 		r = vcn_v4_0_start_sriov(adev);
233 		if (r)
234 			goto done;
235 
236 		for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
237 			if (adev->vcn.harvest_config & (1 << i))
238 				continue;
239 
240 			ring = &adev->vcn.inst[i].ring_enc[0];
241 			if (amdgpu_vcn_is_disabled_vcn(adev, VCN_ENCODE_RING, i)) {
242 				ring->sched.ready = false;
243 				ring->no_scheduler = true;
244 				dev_info(adev->dev, "ring %s is disabled by hypervisor\n", ring->name);
245 			} else {
246 				ring->wptr = 0;
247 				ring->wptr_old = 0;
248 				vcn_v4_0_unified_ring_set_wptr(ring);
249 				ring->sched.ready = true;
250 			}
251 		}
252 	} else {
253 		for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
254 			if (adev->vcn.harvest_config & (1 << i))
255 				continue;
256 
257 			ring = &adev->vcn.inst[i].ring_enc[0];
258 
259 			adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
260 					((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i), i);
261 
262 			r = amdgpu_ring_test_helper(ring);
263 			if (r)
264 				goto done;
265 
266 		}
267 	}
268 
269 done:
270 	if (!r)
271 		DRM_INFO("VCN decode and encode initialized successfully(under %s).\n",
272 			(adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode");
273 
274 	return r;
275 }
276 
277 /**
278  * vcn_v4_0_hw_fini - stop the hardware block
279  *
280  * @handle: amdgpu_device pointer
281  *
282  * Stop the VCN block, mark ring as not ready any more
283  */
284 static int vcn_v4_0_hw_fini(void *handle)
285 {
286 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
287 	int i;
288 
289 	cancel_delayed_work_sync(&adev->vcn.idle_work);
290 
291 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
292 		if (adev->vcn.harvest_config & (1 << i))
293 			continue;
294 		if (!amdgpu_sriov_vf(adev)) {
295 			if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
296                         (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
297                                 RREG32_SOC15(VCN, i, regUVD_STATUS))) {
298                         vcn_v4_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
299 			}
300 		}
301 
302 		amdgpu_irq_put(adev, &adev->vcn.inst[i].irq, 0);
303 	}
304 
305 	return 0;
306 }
307 
308 /**
309  * vcn_v4_0_suspend - suspend VCN block
310  *
311  * @handle: amdgpu_device pointer
312  *
313  * HW fini and suspend VCN block
314  */
315 static int vcn_v4_0_suspend(void *handle)
316 {
317 	int r;
318 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
319 
320 	r = vcn_v4_0_hw_fini(adev);
321 	if (r)
322 		return r;
323 
324 	r = amdgpu_vcn_suspend(adev);
325 
326 	return r;
327 }
328 
329 /**
330  * vcn_v4_0_resume - resume VCN block
331  *
332  * @handle: amdgpu_device pointer
333  *
334  * Resume firmware and hw init VCN block
335  */
336 static int vcn_v4_0_resume(void *handle)
337 {
338 	int r;
339 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
340 
341 	r = amdgpu_vcn_resume(adev);
342 	if (r)
343 		return r;
344 
345 	r = vcn_v4_0_hw_init(adev);
346 
347 	return r;
348 }
349 
350 /**
351  * vcn_v4_0_mc_resume - memory controller programming
352  *
353  * @adev: amdgpu_device pointer
354  * @inst: instance number
355  *
356  * Let the VCN memory controller know it's offsets
357  */
358 static void vcn_v4_0_mc_resume(struct amdgpu_device *adev, int inst)
359 {
360 	uint32_t offset, size;
361 	const struct common_firmware_header *hdr;
362 
363 	hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
364 	size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
365 
366 	/* cache window 0: fw */
367 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
368 		WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
369 			(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_lo));
370 		WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
371 			(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_hi));
372 		WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET0, 0);
373 		offset = 0;
374 	} else {
375 		WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
376 			lower_32_bits(adev->vcn.inst[inst].gpu_addr));
377 		WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
378 			upper_32_bits(adev->vcn.inst[inst].gpu_addr));
379 		offset = size;
380                 WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET0, AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
381 	}
382 	WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_SIZE0, size);
383 
384 	/* cache window 1: stack */
385 	WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
386 		lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset));
387 	WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
388 		upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset));
389 	WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET1, 0);
390 	WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);
391 
392 	/* cache window 2: context */
393 	WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
394 		lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
395 	WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
396 		upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
397 	WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET2, 0);
398 	WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
399 
400 	/* non-cache window */
401 	WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
402 		lower_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr));
403 	WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
404 		upper_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr));
405 	WREG32_SOC15(VCN, inst, regUVD_VCPU_NONCACHE_OFFSET0, 0);
406 	WREG32_SOC15(VCN, inst, regUVD_VCPU_NONCACHE_SIZE0,
407 		AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)));
408 }
409 
410 /**
411  * vcn_v4_0_mc_resume_dpg_mode - memory controller programming for dpg mode
412  *
413  * @adev: amdgpu_device pointer
414  * @inst_idx: instance number index
415  * @indirect: indirectly write sram
416  *
417  * Let the VCN memory controller know it's offsets with dpg mode
418  */
419 static void vcn_v4_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
420 {
421 	uint32_t offset, size;
422 	const struct common_firmware_header *hdr;
423 	hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
424 	size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
425 
426 	/* cache window 0: fw */
427 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
428 		if (!indirect) {
429 			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
430 				VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
431 				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_lo), 0, indirect);
432 			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
433 				VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
434 				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_hi), 0, indirect);
435 			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
436 				VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
437 		} else {
438 			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
439 				VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
440 			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
441 				VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
442 			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
443 				VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
444 		}
445 		offset = 0;
446 	} else {
447 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
448 			VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
449 			lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
450 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
451 			VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
452 			upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
453 		offset = size;
454 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
455 			VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0),
456 			AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
457 	}
458 
459 	if (!indirect)
460 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
461 			VCN, inst_idx, regUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
462 	else
463 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
464 			VCN, inst_idx, regUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
465 
466 	/* cache window 1: stack */
467 	if (!indirect) {
468 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
469 			VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
470 			lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
471 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
472 			VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
473 			upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
474 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
475 			VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
476 	} else {
477 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
478 			VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
479 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
480 			VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
481 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
482 			VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
483 	}
484 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
485 			VCN, inst_idx, regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
486 
487 	/* cache window 2: context */
488 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
489 			VCN, inst_idx, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
490 			lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
491 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
492 			VCN, inst_idx, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
493 			upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
494 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
495 			VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
496 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
497 			VCN, inst_idx, regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
498 
499 	/* non-cache window */
500 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
501 			VCN, inst_idx, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
502 			lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
503 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
504 			VCN, inst_idx, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
505 			upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
506 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
507 			VCN, inst_idx, regUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
508 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
509 			VCN, inst_idx, regUVD_VCPU_NONCACHE_SIZE0),
510 			AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)), 0, indirect);
511 
512 	/* VCN global tiling registers */
513 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
514 		VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
515 }
516 
517 /**
518  * vcn_v4_0_disable_static_power_gating - disable VCN static power gating
519  *
520  * @adev: amdgpu_device pointer
521  * @inst: instance number
522  *
523  * Disable static power gating for VCN block
524  */
525 static void vcn_v4_0_disable_static_power_gating(struct amdgpu_device *adev, int inst)
526 {
527 	uint32_t data = 0;
528 
529 	if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
530 		data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
531 			| 1 << UVD_PGFSM_CONFIG__UVDS_PWR_CONFIG__SHIFT
532 			| 1 << UVD_PGFSM_CONFIG__UVDLM_PWR_CONFIG__SHIFT
533 			| 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
534 			| 2 << UVD_PGFSM_CONFIG__UVDTC_PWR_CONFIG__SHIFT
535 			| 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
536 			| 2 << UVD_PGFSM_CONFIG__UVDTA_PWR_CONFIG__SHIFT
537 			| 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
538 			| 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
539 			| 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
540 			| 2 << UVD_PGFSM_CONFIG__UVDAB_PWR_CONFIG__SHIFT
541 			| 2 << UVD_PGFSM_CONFIG__UVDTB_PWR_CONFIG__SHIFT
542 			| 2 << UVD_PGFSM_CONFIG__UVDNA_PWR_CONFIG__SHIFT
543 			| 2 << UVD_PGFSM_CONFIG__UVDNB_PWR_CONFIG__SHIFT);
544 
545 		WREG32_SOC15(VCN, inst, regUVD_PGFSM_CONFIG, data);
546 		SOC15_WAIT_ON_RREG(VCN, inst, regUVD_PGFSM_STATUS,
547 			UVD_PGFSM_STATUS__UVDM_UVDU_UVDLM_PWR_ON_3_0, 0x3F3FFFFF);
548 	} else {
549 		uint32_t value;
550 
551 		value = (inst) ? 0x2200800 : 0;
552 		data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
553 			| 1 << UVD_PGFSM_CONFIG__UVDS_PWR_CONFIG__SHIFT
554 			| 1 << UVD_PGFSM_CONFIG__UVDLM_PWR_CONFIG__SHIFT
555 			| 1 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
556 			| 1 << UVD_PGFSM_CONFIG__UVDTC_PWR_CONFIG__SHIFT
557 			| 1 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
558 			| 1 << UVD_PGFSM_CONFIG__UVDTA_PWR_CONFIG__SHIFT
559 			| 1 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
560 			| 1 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
561 			| 1 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
562 			| 1 << UVD_PGFSM_CONFIG__UVDAB_PWR_CONFIG__SHIFT
563 			| 1 << UVD_PGFSM_CONFIG__UVDTB_PWR_CONFIG__SHIFT
564 			| 1 << UVD_PGFSM_CONFIG__UVDNA_PWR_CONFIG__SHIFT
565 			| 1 << UVD_PGFSM_CONFIG__UVDNB_PWR_CONFIG__SHIFT);
566 
567                 WREG32_SOC15(VCN, inst, regUVD_PGFSM_CONFIG, data);
568                 SOC15_WAIT_ON_RREG(VCN, inst, regUVD_PGFSM_STATUS, value,  0x3F3FFFFF);
569         }
570 
571         data = RREG32_SOC15(VCN, inst, regUVD_POWER_STATUS);
572         data &= ~0x103;
573         if (adev->pg_flags & AMD_PG_SUPPORT_VCN)
574                 data |= UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON |
575                         UVD_POWER_STATUS__UVD_PG_EN_MASK;
576 
577         WREG32_SOC15(VCN, inst, regUVD_POWER_STATUS, data);
578 
579         return;
580 }
581 
582 /**
583  * vcn_v4_0_enable_static_power_gating - enable VCN static power gating
584  *
585  * @adev: amdgpu_device pointer
586  * @inst: instance number
587  *
588  * Enable static power gating for VCN block
589  */
590 static void vcn_v4_0_enable_static_power_gating(struct amdgpu_device *adev, int inst)
591 {
592 	uint32_t data;
593 
594 	if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
595 		/* Before power off, this indicator has to be turned on */
596 		data = RREG32_SOC15(VCN, inst, regUVD_POWER_STATUS);
597 		data &= ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK;
598 		data |= UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF;
599 		WREG32_SOC15(VCN, inst, regUVD_POWER_STATUS, data);
600 
601 		data = (2 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
602 			| 2 << UVD_PGFSM_CONFIG__UVDS_PWR_CONFIG__SHIFT
603 			| 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
604 			| 2 << UVD_PGFSM_CONFIG__UVDTC_PWR_CONFIG__SHIFT
605 			| 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
606 			| 2 << UVD_PGFSM_CONFIG__UVDTA_PWR_CONFIG__SHIFT
607 			| 2 << UVD_PGFSM_CONFIG__UVDLM_PWR_CONFIG__SHIFT
608 			| 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
609 			| 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
610 			| 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
611 			| 2 << UVD_PGFSM_CONFIG__UVDAB_PWR_CONFIG__SHIFT
612 			| 2 << UVD_PGFSM_CONFIG__UVDTB_PWR_CONFIG__SHIFT
613 			| 2 << UVD_PGFSM_CONFIG__UVDNA_PWR_CONFIG__SHIFT
614 			| 2 << UVD_PGFSM_CONFIG__UVDNB_PWR_CONFIG__SHIFT);
615 		WREG32_SOC15(VCN, inst, regUVD_PGFSM_CONFIG, data);
616 
617 		data = (2 << UVD_PGFSM_STATUS__UVDM_PWR_STATUS__SHIFT
618 			| 2 << UVD_PGFSM_STATUS__UVDS_PWR_STATUS__SHIFT
619 			| 2 << UVD_PGFSM_STATUS__UVDF_PWR_STATUS__SHIFT
620 			| 2 << UVD_PGFSM_STATUS__UVDTC_PWR_STATUS__SHIFT
621 			| 2 << UVD_PGFSM_STATUS__UVDB_PWR_STATUS__SHIFT
622 			| 2 << UVD_PGFSM_STATUS__UVDTA_PWR_STATUS__SHIFT
623 			| 2 << UVD_PGFSM_STATUS__UVDLM_PWR_STATUS__SHIFT
624 			| 2 << UVD_PGFSM_STATUS__UVDTD_PWR_STATUS__SHIFT
625 			| 2 << UVD_PGFSM_STATUS__UVDTE_PWR_STATUS__SHIFT
626 			| 2 << UVD_PGFSM_STATUS__UVDE_PWR_STATUS__SHIFT
627 			| 2 << UVD_PGFSM_STATUS__UVDAB_PWR_STATUS__SHIFT
628 			| 2 << UVD_PGFSM_STATUS__UVDTB_PWR_STATUS__SHIFT
629 			| 2 << UVD_PGFSM_STATUS__UVDNA_PWR_STATUS__SHIFT
630 			| 2 << UVD_PGFSM_STATUS__UVDNB_PWR_STATUS__SHIFT);
631 		SOC15_WAIT_ON_RREG(VCN, inst, regUVD_PGFSM_STATUS, data, 0x3F3FFFFF);
632 	}
633 
634         return;
635 }
636 
637 /**
638  * vcn_v4_0_disable_clock_gating - disable VCN clock gating
639  *
640  * @adev: amdgpu_device pointer
641  * @inst: instance number
642  *
643  * Disable clock gating for VCN block
644  */
645 static void vcn_v4_0_disable_clock_gating(struct amdgpu_device *adev, int inst)
646 {
647 	uint32_t data;
648 
649 	if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
650 		return;
651 
652 	/* VCN disable CGC */
653 	data = RREG32_SOC15(VCN, inst, regUVD_CGC_CTRL);
654 	data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
655 	data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
656 	data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
657 	WREG32_SOC15(VCN, inst, regUVD_CGC_CTRL, data);
658 
659 	data = RREG32_SOC15(VCN, inst, regUVD_CGC_GATE);
660 	data &= ~(UVD_CGC_GATE__SYS_MASK
661 		| UVD_CGC_GATE__UDEC_MASK
662 		| UVD_CGC_GATE__MPEG2_MASK
663 		| UVD_CGC_GATE__REGS_MASK
664 		| UVD_CGC_GATE__RBC_MASK
665 		| UVD_CGC_GATE__LMI_MC_MASK
666 		| UVD_CGC_GATE__LMI_UMC_MASK
667 		| UVD_CGC_GATE__IDCT_MASK
668 		| UVD_CGC_GATE__MPRD_MASK
669 		| UVD_CGC_GATE__MPC_MASK
670 		| UVD_CGC_GATE__LBSI_MASK
671 		| UVD_CGC_GATE__LRBBM_MASK
672 		| UVD_CGC_GATE__UDEC_RE_MASK
673 		| UVD_CGC_GATE__UDEC_CM_MASK
674 		| UVD_CGC_GATE__UDEC_IT_MASK
675 		| UVD_CGC_GATE__UDEC_DB_MASK
676 		| UVD_CGC_GATE__UDEC_MP_MASK
677 		| UVD_CGC_GATE__WCB_MASK
678 		| UVD_CGC_GATE__VCPU_MASK
679 		| UVD_CGC_GATE__MMSCH_MASK);
680 
681 	WREG32_SOC15(VCN, inst, regUVD_CGC_GATE, data);
682 	SOC15_WAIT_ON_RREG(VCN, inst, regUVD_CGC_GATE, 0,  0xFFFFFFFF);
683 
684 	data = RREG32_SOC15(VCN, inst, regUVD_CGC_CTRL);
685 	data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK
686 		| UVD_CGC_CTRL__UDEC_CM_MODE_MASK
687 		| UVD_CGC_CTRL__UDEC_IT_MODE_MASK
688 		| UVD_CGC_CTRL__UDEC_DB_MODE_MASK
689 		| UVD_CGC_CTRL__UDEC_MP_MODE_MASK
690 		| UVD_CGC_CTRL__SYS_MODE_MASK
691 		| UVD_CGC_CTRL__UDEC_MODE_MASK
692 		| UVD_CGC_CTRL__MPEG2_MODE_MASK
693 		| UVD_CGC_CTRL__REGS_MODE_MASK
694 		| UVD_CGC_CTRL__RBC_MODE_MASK
695 		| UVD_CGC_CTRL__LMI_MC_MODE_MASK
696 		| UVD_CGC_CTRL__LMI_UMC_MODE_MASK
697 		| UVD_CGC_CTRL__IDCT_MODE_MASK
698 		| UVD_CGC_CTRL__MPRD_MODE_MASK
699 		| UVD_CGC_CTRL__MPC_MODE_MASK
700 		| UVD_CGC_CTRL__LBSI_MODE_MASK
701 		| UVD_CGC_CTRL__LRBBM_MODE_MASK
702 		| UVD_CGC_CTRL__WCB_MODE_MASK
703 		| UVD_CGC_CTRL__VCPU_MODE_MASK
704 		| UVD_CGC_CTRL__MMSCH_MODE_MASK);
705 	WREG32_SOC15(VCN, inst, regUVD_CGC_CTRL, data);
706 
707 	data = RREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_GATE);
708 	data |= (UVD_SUVD_CGC_GATE__SRE_MASK
709 		| UVD_SUVD_CGC_GATE__SIT_MASK
710 		| UVD_SUVD_CGC_GATE__SMP_MASK
711 		| UVD_SUVD_CGC_GATE__SCM_MASK
712 		| UVD_SUVD_CGC_GATE__SDB_MASK
713 		| UVD_SUVD_CGC_GATE__SRE_H264_MASK
714 		| UVD_SUVD_CGC_GATE__SRE_HEVC_MASK
715 		| UVD_SUVD_CGC_GATE__SIT_H264_MASK
716 		| UVD_SUVD_CGC_GATE__SIT_HEVC_MASK
717 		| UVD_SUVD_CGC_GATE__SCM_H264_MASK
718 		| UVD_SUVD_CGC_GATE__SCM_HEVC_MASK
719 		| UVD_SUVD_CGC_GATE__SDB_H264_MASK
720 		| UVD_SUVD_CGC_GATE__SDB_HEVC_MASK
721 		| UVD_SUVD_CGC_GATE__SCLR_MASK
722 		| UVD_SUVD_CGC_GATE__UVD_SC_MASK
723 		| UVD_SUVD_CGC_GATE__ENT_MASK
724 		| UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK
725 		| UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK
726 		| UVD_SUVD_CGC_GATE__SITE_MASK
727 		| UVD_SUVD_CGC_GATE__SRE_VP9_MASK
728 		| UVD_SUVD_CGC_GATE__SCM_VP9_MASK
729 		| UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK
730 		| UVD_SUVD_CGC_GATE__SDB_VP9_MASK
731 		| UVD_SUVD_CGC_GATE__IME_HEVC_MASK);
732 	WREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_GATE, data);
733 
734 	data = RREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_CTRL);
735 	data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
736 		| UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
737 		| UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
738 		| UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
739 		| UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
740 		| UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
741 		| UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
742 		| UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
743 		| UVD_SUVD_CGC_CTRL__IME_MODE_MASK
744 		| UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
745 	WREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_CTRL, data);
746 }
747 
748 /**
749  * vcn_v4_0_disable_clock_gating_dpg_mode - disable VCN clock gating dpg mode
750  *
751  * @adev: amdgpu_device pointer
752  * @sram_sel: sram select
753  * @inst_idx: instance number index
754  * @indirect: indirectly write sram
755  *
756  * Disable clock gating for VCN block with dpg mode
757  */
758 static void vcn_v4_0_disable_clock_gating_dpg_mode(struct amdgpu_device *adev, uint8_t sram_sel,
759       int inst_idx, uint8_t indirect)
760 {
761 	uint32_t reg_data = 0;
762 
763 	if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
764 		return;
765 
766 	/* enable sw clock gating control */
767 	reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
768 	reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
769 	reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
770 	reg_data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK |
771 		 UVD_CGC_CTRL__UDEC_CM_MODE_MASK |
772 		 UVD_CGC_CTRL__UDEC_IT_MODE_MASK |
773 		 UVD_CGC_CTRL__UDEC_DB_MODE_MASK |
774 		 UVD_CGC_CTRL__UDEC_MP_MODE_MASK |
775 		 UVD_CGC_CTRL__SYS_MODE_MASK |
776 		 UVD_CGC_CTRL__UDEC_MODE_MASK |
777 		 UVD_CGC_CTRL__MPEG2_MODE_MASK |
778 		 UVD_CGC_CTRL__REGS_MODE_MASK |
779 		 UVD_CGC_CTRL__RBC_MODE_MASK |
780 		 UVD_CGC_CTRL__LMI_MC_MODE_MASK |
781 		 UVD_CGC_CTRL__LMI_UMC_MODE_MASK |
782 		 UVD_CGC_CTRL__IDCT_MODE_MASK |
783 		 UVD_CGC_CTRL__MPRD_MODE_MASK |
784 		 UVD_CGC_CTRL__MPC_MODE_MASK |
785 		 UVD_CGC_CTRL__LBSI_MODE_MASK |
786 		 UVD_CGC_CTRL__LRBBM_MODE_MASK |
787 		 UVD_CGC_CTRL__WCB_MODE_MASK |
788 		 UVD_CGC_CTRL__VCPU_MODE_MASK);
789 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
790 		VCN, inst_idx, regUVD_CGC_CTRL), reg_data, sram_sel, indirect);
791 
792 	/* turn off clock gating */
793 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
794 		VCN, inst_idx, regUVD_CGC_GATE), 0, sram_sel, indirect);
795 
796 	/* turn on SUVD clock gating */
797 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
798 		VCN, inst_idx, regUVD_SUVD_CGC_GATE), 1, sram_sel, indirect);
799 
800 	/* turn on sw mode in UVD_SUVD_CGC_CTRL */
801 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
802 		VCN, inst_idx, regUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect);
803 }
804 
805 /**
806  * vcn_v4_0_enable_clock_gating - enable VCN clock gating
807  *
808  * @adev: amdgpu_device pointer
809  * @inst: instance number
810  *
811  * Enable clock gating for VCN block
812  */
813 static void vcn_v4_0_enable_clock_gating(struct amdgpu_device *adev, int inst)
814 {
815 	uint32_t data;
816 
817 	if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
818 		return;
819 
820 	/* enable VCN CGC */
821 	data = RREG32_SOC15(VCN, inst, regUVD_CGC_CTRL);
822 	data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
823 	data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
824 	data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
825 	WREG32_SOC15(VCN, inst, regUVD_CGC_CTRL, data);
826 
827 	data = RREG32_SOC15(VCN, inst, regUVD_CGC_CTRL);
828 	data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK
829 		| UVD_CGC_CTRL__UDEC_CM_MODE_MASK
830 		| UVD_CGC_CTRL__UDEC_IT_MODE_MASK
831 		| UVD_CGC_CTRL__UDEC_DB_MODE_MASK
832 		| UVD_CGC_CTRL__UDEC_MP_MODE_MASK
833 		| UVD_CGC_CTRL__SYS_MODE_MASK
834 		| UVD_CGC_CTRL__UDEC_MODE_MASK
835 		| UVD_CGC_CTRL__MPEG2_MODE_MASK
836 		| UVD_CGC_CTRL__REGS_MODE_MASK
837 		| UVD_CGC_CTRL__RBC_MODE_MASK
838 		| UVD_CGC_CTRL__LMI_MC_MODE_MASK
839 		| UVD_CGC_CTRL__LMI_UMC_MODE_MASK
840 		| UVD_CGC_CTRL__IDCT_MODE_MASK
841 		| UVD_CGC_CTRL__MPRD_MODE_MASK
842 		| UVD_CGC_CTRL__MPC_MODE_MASK
843 		| UVD_CGC_CTRL__LBSI_MODE_MASK
844 		| UVD_CGC_CTRL__LRBBM_MODE_MASK
845 		| UVD_CGC_CTRL__WCB_MODE_MASK
846 		| UVD_CGC_CTRL__VCPU_MODE_MASK
847 		| UVD_CGC_CTRL__MMSCH_MODE_MASK);
848 	WREG32_SOC15(VCN, inst, regUVD_CGC_CTRL, data);
849 
850 	data = RREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_CTRL);
851 	data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
852 		| UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
853 		| UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
854 		| UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
855 		| UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
856 		| UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
857 		| UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
858 		| UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
859 		| UVD_SUVD_CGC_CTRL__IME_MODE_MASK
860 		| UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
861 	WREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_CTRL, data);
862 
863 	return;
864 }
865 
866 static void vcn_v4_0_enable_ras(struct amdgpu_device *adev, int inst_idx,
867 				bool indirect)
868 {
869 	uint32_t tmp;
870 
871 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN))
872 		return;
873 
874 	tmp = VCN_RAS_CNTL__VCPU_VCODEC_REARM_MASK |
875 	      VCN_RAS_CNTL__VCPU_VCODEC_IH_EN_MASK |
876 	      VCN_RAS_CNTL__VCPU_VCODEC_PMI_EN_MASK |
877 	      VCN_RAS_CNTL__VCPU_VCODEC_STALL_EN_MASK;
878 	WREG32_SOC15_DPG_MODE(inst_idx,
879 			      SOC15_DPG_MODE_OFFSET(VCN, 0, regVCN_RAS_CNTL),
880 			      tmp, 0, indirect);
881 
882 	tmp = UVD_SYS_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK;
883 	WREG32_SOC15_DPG_MODE(inst_idx,
884 			      SOC15_DPG_MODE_OFFSET(VCN, 0, regUVD_SYS_INT_EN),
885 			      tmp, 0, indirect);
886 }
887 
888 /**
889  * vcn_v4_0_start_dpg_mode - VCN start with dpg mode
890  *
891  * @adev: amdgpu_device pointer
892  * @inst_idx: instance number index
893  * @indirect: indirectly write sram
894  *
895  * Start VCN block with dpg mode
896  */
897 static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
898 {
899 	volatile struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
900 	struct amdgpu_ring *ring;
901 	uint32_t tmp;
902 
903 	/* disable register anti-hang mechanism */
904 	WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 1,
905 		~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
906 	/* enable dynamic power gating mode */
907 	tmp = RREG32_SOC15(VCN, inst_idx, regUVD_POWER_STATUS);
908 	tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK;
909 	tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK;
910 	WREG32_SOC15(VCN, inst_idx, regUVD_POWER_STATUS, tmp);
911 
912 	if (indirect)
913 		adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
914 
915 	/* enable clock gating */
916 	vcn_v4_0_disable_clock_gating_dpg_mode(adev, 0, inst_idx, indirect);
917 
918 	/* enable VCPU clock */
919 	tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
920 	tmp |= UVD_VCPU_CNTL__CLK_EN_MASK | UVD_VCPU_CNTL__BLK_RST_MASK;
921 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
922 		VCN, inst_idx, regUVD_VCPU_CNTL), tmp, 0, indirect);
923 
924 	/* disable master interupt */
925 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
926 		VCN, inst_idx, regUVD_MASTINT_EN), 0, 0, indirect);
927 
928 	/* setup regUVD_LMI_CTRL */
929 	tmp = (UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
930 		UVD_LMI_CTRL__REQ_MODE_MASK |
931 		UVD_LMI_CTRL__CRC_RESET_MASK |
932 		UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
933 		UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
934 		UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
935 		(8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
936 		0x00100000L);
937 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
938 		VCN, inst_idx, regUVD_LMI_CTRL), tmp, 0, indirect);
939 
940 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
941 		VCN, inst_idx, regUVD_MPC_CNTL),
942 		0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect);
943 
944 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
945 		VCN, inst_idx, regUVD_MPC_SET_MUXA0),
946 		((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
947 		 (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
948 		 (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
949 		 (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect);
950 
951 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
952 		VCN, inst_idx, regUVD_MPC_SET_MUXB0),
953 		 ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
954 		 (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
955 		 (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
956 		 (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect);
957 
958 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
959 		VCN, inst_idx, regUVD_MPC_SET_MUX),
960 		((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
961 		 (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
962 		 (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);
963 
964 	vcn_v4_0_mc_resume_dpg_mode(adev, inst_idx, indirect);
965 
966 	tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
967 	tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
968 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
969 		VCN, inst_idx, regUVD_VCPU_CNTL), tmp, 0, indirect);
970 
971 	/* enable LMI MC and UMC channels */
972 	tmp = 0x1f << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT;
973 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
974 		VCN, inst_idx, regUVD_LMI_CTRL2), tmp, 0, indirect);
975 
976 	vcn_v4_0_enable_ras(adev, inst_idx, indirect);
977 
978 	/* enable master interrupt */
979 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
980 		VCN, inst_idx, regUVD_MASTINT_EN),
981 		UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
982 
983 
984 	if (indirect)
985 		psp_update_vcn_sram(adev, inst_idx, adev->vcn.inst[inst_idx].dpg_sram_gpu_addr,
986 			(uint32_t)((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr -
987 				(uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr));
988 
989 	ring = &adev->vcn.inst[inst_idx].ring_enc[0];
990 
991 	WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO, ring->gpu_addr);
992 	WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
993 	WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE, ring->ring_size / 4);
994 
995 	tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE);
996 	tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
997 	WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp);
998 	fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
999 	WREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR, 0);
1000 	WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, 0);
1001 
1002 	tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR);
1003 	WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, tmp);
1004 	ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR);
1005 
1006 	tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE);
1007 	tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
1008 	WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp);
1009 	fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
1010 
1011 	WREG32_SOC15(VCN, inst_idx, regVCN_RB1_DB_CTRL,
1012 			ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
1013 			VCN_RB1_DB_CTRL__EN_MASK);
1014 
1015 	return 0;
1016 }
1017 
1018 
1019 /**
1020  * vcn_v4_0_start - VCN start
1021  *
1022  * @adev: amdgpu_device pointer
1023  *
1024  * Start VCN block
1025  */
1026 static int vcn_v4_0_start(struct amdgpu_device *adev)
1027 {
1028 	volatile struct amdgpu_vcn4_fw_shared *fw_shared;
1029 	struct amdgpu_ring *ring;
1030 	uint32_t tmp;
1031 	int i, j, k, r;
1032 
1033 	if (adev->pm.dpm_enabled)
1034 		amdgpu_dpm_enable_uvd(adev, true);
1035 
1036 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1037 		fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
1038 
1039 		if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
1040 			r = vcn_v4_0_start_dpg_mode(adev, i, adev->vcn.indirect_sram);
1041 			continue;
1042 		}
1043 
1044 		/* disable VCN power gating */
1045 		vcn_v4_0_disable_static_power_gating(adev, i);
1046 
1047 		/* set VCN status busy */
1048 		tmp = RREG32_SOC15(VCN, i, regUVD_STATUS) | UVD_STATUS__UVD_BUSY;
1049 		WREG32_SOC15(VCN, i, regUVD_STATUS, tmp);
1050 
1051 		/*SW clock gating */
1052 		vcn_v4_0_disable_clock_gating(adev, i);
1053 
1054 		/* enable VCPU clock */
1055 		WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
1056 				UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
1057 
1058 		/* disable master interrupt */
1059 		WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), 0,
1060 				~UVD_MASTINT_EN__VCPU_EN_MASK);
1061 
1062 		/* enable LMI MC and UMC channels */
1063 		WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_LMI_CTRL2), 0,
1064 				~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
1065 
1066 		tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
1067 		tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
1068 		tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
1069 		WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
1070 
1071 		/* setup regUVD_LMI_CTRL */
1072 		tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL);
1073 		WREG32_SOC15(VCN, i, regUVD_LMI_CTRL, tmp |
1074 				UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
1075 				UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
1076 				UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
1077 				UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
1078 
1079 		/* setup regUVD_MPC_CNTL */
1080 		tmp = RREG32_SOC15(VCN, i, regUVD_MPC_CNTL);
1081 		tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
1082 		tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
1083 		WREG32_SOC15(VCN, i, regUVD_MPC_CNTL, tmp);
1084 
1085 		/* setup UVD_MPC_SET_MUXA0 */
1086 		WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXA0,
1087 				((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
1088 				 (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
1089 				 (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
1090 				 (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
1091 
1092 		/* setup UVD_MPC_SET_MUXB0 */
1093 		WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXB0,
1094 				((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
1095 				 (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
1096 				 (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
1097 				 (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
1098 
1099 		/* setup UVD_MPC_SET_MUX */
1100 		WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUX,
1101 				((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
1102 				 (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
1103 				 (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
1104 
1105 		vcn_v4_0_mc_resume(adev, i);
1106 
1107 		/* VCN global tiling registers */
1108 		WREG32_SOC15(VCN, i, regUVD_GFX10_ADDR_CONFIG,
1109 				adev->gfx.config.gb_addr_config);
1110 
1111 		/* unblock VCPU register access */
1112 		WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL), 0,
1113 				~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
1114 
1115 		/* release VCPU reset to boot */
1116 		WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
1117 				~UVD_VCPU_CNTL__BLK_RST_MASK);
1118 
1119 		for (j = 0; j < 10; ++j) {
1120 			uint32_t status;
1121 
1122 			for (k = 0; k < 100; ++k) {
1123 				status = RREG32_SOC15(VCN, i, regUVD_STATUS);
1124 				if (status & 2)
1125 					break;
1126 				mdelay(10);
1127 				if (amdgpu_emu_mode==1)
1128 					msleep(1);
1129 			}
1130 
1131 			if (amdgpu_emu_mode==1) {
1132 				r = -1;
1133 				if (status & 2) {
1134 					r = 0;
1135 					break;
1136 				}
1137 			} else {
1138 				r = 0;
1139 				if (status & 2)
1140 					break;
1141 
1142 				dev_err(adev->dev, "VCN[%d] is not responding, trying to reset the VCPU!!!\n", i);
1143 				WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
1144 							UVD_VCPU_CNTL__BLK_RST_MASK,
1145 							~UVD_VCPU_CNTL__BLK_RST_MASK);
1146 				mdelay(10);
1147 				WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
1148 						~UVD_VCPU_CNTL__BLK_RST_MASK);
1149 
1150 				mdelay(10);
1151 				r = -1;
1152 			}
1153 		}
1154 
1155 		if (r) {
1156 			dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i);
1157 			return r;
1158 		}
1159 
1160 		/* enable master interrupt */
1161 		WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN),
1162 				UVD_MASTINT_EN__VCPU_EN_MASK,
1163 				~UVD_MASTINT_EN__VCPU_EN_MASK);
1164 
1165 		/* clear the busy bit of VCN_STATUS */
1166 		WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_STATUS), 0,
1167 				~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
1168 
1169 		ring = &adev->vcn.inst[i].ring_enc[0];
1170 		WREG32_SOC15(VCN, i, regVCN_RB1_DB_CTRL,
1171 				ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
1172 				VCN_RB1_DB_CTRL__EN_MASK);
1173 
1174 		WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO, ring->gpu_addr);
1175 		WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
1176 		WREG32_SOC15(VCN, i, regUVD_RB_SIZE, ring->ring_size / 4);
1177 
1178 		tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
1179 		tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
1180 		WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
1181 		fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
1182 		WREG32_SOC15(VCN, i, regUVD_RB_RPTR, 0);
1183 		WREG32_SOC15(VCN, i, regUVD_RB_WPTR, 0);
1184 
1185 		tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR);
1186 		WREG32_SOC15(VCN, i, regUVD_RB_WPTR, tmp);
1187 		ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR);
1188 
1189 		tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
1190 		tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
1191 		WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
1192 		fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
1193 	}
1194 
1195 	return 0;
1196 }
1197 
1198 static int vcn_v4_0_start_sriov(struct amdgpu_device *adev)
1199 {
1200 	int i;
1201 	struct amdgpu_ring *ring_enc;
1202 	uint64_t cache_addr;
1203 	uint64_t rb_enc_addr;
1204 	uint64_t ctx_addr;
1205 	uint32_t param, resp, expected;
1206 	uint32_t offset, cache_size;
1207 	uint32_t tmp, timeout;
1208 
1209 	struct amdgpu_mm_table *table = &adev->virt.mm_table;
1210 	uint32_t *table_loc;
1211 	uint32_t table_size;
1212 	uint32_t size, size_dw;
1213 	uint32_t init_status;
1214 	uint32_t enabled_vcn;
1215 
1216 	struct mmsch_v4_0_cmd_direct_write
1217 		direct_wt = { {0} };
1218 	struct mmsch_v4_0_cmd_direct_read_modify_write
1219 		direct_rd_mod_wt = { {0} };
1220 	struct mmsch_v4_0_cmd_end end = { {0} };
1221 	struct mmsch_v4_0_init_header header;
1222 
1223 	volatile struct amdgpu_vcn4_fw_shared *fw_shared;
1224 	volatile struct amdgpu_fw_shared_rb_setup *rb_setup;
1225 
1226 	direct_wt.cmd_header.command_type =
1227 		MMSCH_COMMAND__DIRECT_REG_WRITE;
1228 	direct_rd_mod_wt.cmd_header.command_type =
1229 		MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
1230 	end.cmd_header.command_type =
1231 		MMSCH_COMMAND__END;
1232 
1233 	header.version = MMSCH_VERSION;
1234 	header.total_size = sizeof(struct mmsch_v4_0_init_header) >> 2;
1235 	for (i = 0; i < AMDGPU_MAX_VCN_INSTANCES; i++) {
1236 		header.inst[i].init_status = 0;
1237 		header.inst[i].table_offset = 0;
1238 		header.inst[i].table_size = 0;
1239 	}
1240 
1241 	table_loc = (uint32_t *)table->cpu_addr;
1242 	table_loc += header.total_size;
1243 	for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
1244 		if (adev->vcn.harvest_config & (1 << i))
1245 			continue;
1246 
1247 		table_size = 0;
1248 
1249 		MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, i,
1250 			regUVD_STATUS),
1251 			~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
1252 
1253 		cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
1254 
1255 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1256 			MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1257 				regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
1258 				adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo);
1259 			MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1260 				regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
1261 				adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi);
1262 			offset = 0;
1263 			MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1264 				regUVD_VCPU_CACHE_OFFSET0),
1265 				0);
1266 		} else {
1267 			MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1268 				regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
1269 				lower_32_bits(adev->vcn.inst[i].gpu_addr));
1270 			MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1271 				regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
1272 				upper_32_bits(adev->vcn.inst[i].gpu_addr));
1273 			offset = cache_size;
1274 			MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1275 				regUVD_VCPU_CACHE_OFFSET0),
1276 				AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
1277 		}
1278 
1279 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1280 			regUVD_VCPU_CACHE_SIZE0),
1281 			cache_size);
1282 
1283 		cache_addr = adev->vcn.inst[i].gpu_addr + offset;
1284 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1285 			regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
1286 			lower_32_bits(cache_addr));
1287 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1288 			regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
1289 			upper_32_bits(cache_addr));
1290 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1291 			regUVD_VCPU_CACHE_OFFSET1),
1292 			0);
1293 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1294 			regUVD_VCPU_CACHE_SIZE1),
1295 			AMDGPU_VCN_STACK_SIZE);
1296 
1297 		cache_addr = adev->vcn.inst[i].gpu_addr + offset +
1298 			AMDGPU_VCN_STACK_SIZE;
1299 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1300 			regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
1301 			lower_32_bits(cache_addr));
1302 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1303 			regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
1304 			upper_32_bits(cache_addr));
1305 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1306 			regUVD_VCPU_CACHE_OFFSET2),
1307 			0);
1308 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1309 			regUVD_VCPU_CACHE_SIZE2),
1310 			AMDGPU_VCN_CONTEXT_SIZE);
1311 
1312 		fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
1313 		rb_setup = &fw_shared->rb_setup;
1314 
1315 		ring_enc = &adev->vcn.inst[i].ring_enc[0];
1316 		ring_enc->wptr = 0;
1317 		rb_enc_addr = ring_enc->gpu_addr;
1318 
1319 		rb_setup->is_rb_enabled_flags |= RB_ENABLED;
1320 		rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr);
1321 		rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr);
1322 		rb_setup->rb_size = ring_enc->ring_size / 4;
1323 		fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
1324 
1325 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1326 			regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
1327 			lower_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr));
1328 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1329 			regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
1330 			upper_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr));
1331 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1332 			regUVD_VCPU_NONCACHE_SIZE0),
1333 			AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)));
1334 
1335 		/* add end packet */
1336 		MMSCH_V4_0_INSERT_END();
1337 
1338 		/* refine header */
1339 		header.inst[i].init_status = 0;
1340 		header.inst[i].table_offset = header.total_size;
1341 		header.inst[i].table_size = table_size;
1342 		header.total_size += table_size;
1343 	}
1344 
1345 	/* Update init table header in memory */
1346 	size = sizeof(struct mmsch_v4_0_init_header);
1347 	table_loc = (uint32_t *)table->cpu_addr;
1348 	memcpy((void *)table_loc, &header, size);
1349 
1350 	/* message MMSCH (in VCN[0]) to initialize this client
1351 	 * 1, write to mmsch_vf_ctx_addr_lo/hi register with GPU mc addr
1352 	 * of memory descriptor location
1353 	 */
1354 	ctx_addr = table->gpu_addr;
1355 	WREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
1356 	WREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));
1357 
1358 	/* 2, update vmid of descriptor */
1359 	tmp = RREG32_SOC15(VCN, 0, regMMSCH_VF_VMID);
1360 	tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
1361 	/* use domain0 for MM scheduler */
1362 	tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
1363 	WREG32_SOC15(VCN, 0, regMMSCH_VF_VMID, tmp);
1364 
1365 	/* 3, notify mmsch about the size of this descriptor */
1366 	size = header.total_size;
1367 	WREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_SIZE, size);
1368 
1369 	/* 4, set resp to zero */
1370 	WREG32_SOC15(VCN, 0, regMMSCH_VF_MAILBOX_RESP, 0);
1371 
1372 	/* 5, kick off the initialization and wait until
1373 	 * MMSCH_VF_MAILBOX_RESP becomes non-zero
1374 	 */
1375 	param = 0x00000001;
1376 	WREG32_SOC15(VCN, 0, regMMSCH_VF_MAILBOX_HOST, param);
1377 	tmp = 0;
1378 	timeout = 1000;
1379 	resp = 0;
1380 	expected = MMSCH_VF_MAILBOX_RESP__OK;
1381 	while (resp != expected) {
1382 		resp = RREG32_SOC15(VCN, 0, regMMSCH_VF_MAILBOX_RESP);
1383 		if (resp != 0)
1384 			break;
1385 
1386 		udelay(10);
1387 		tmp = tmp + 10;
1388 		if (tmp >= timeout) {
1389 			DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
1390 				" waiting for regMMSCH_VF_MAILBOX_RESP "\
1391 				"(expected=0x%08x, readback=0x%08x)\n",
1392 				tmp, expected, resp);
1393 			return -EBUSY;
1394 		}
1395 	}
1396 	enabled_vcn = amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, 0) ? 1 : 0;
1397 	init_status = ((struct mmsch_v4_0_init_header *)(table_loc))->inst[enabled_vcn].init_status;
1398 	if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE
1399 	&& init_status != MMSCH_VF_ENGINE_STATUS__PASS)
1400 		DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init "\
1401 			"status for VCN%x: 0x%x\n", resp, enabled_vcn, init_status);
1402 
1403 	return 0;
1404 }
1405 
1406 /**
1407  * vcn_v4_0_stop_dpg_mode - VCN stop with dpg mode
1408  *
1409  * @adev: amdgpu_device pointer
1410  * @inst_idx: instance number index
1411  *
1412  * Stop VCN block with dpg mode
1413  */
1414 static void vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
1415 {
1416 	uint32_t tmp;
1417 
1418 	/* Wait for power status to be 1 */
1419 	SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 1,
1420 		UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1421 
1422 	/* wait for read ptr to be equal to write ptr */
1423 	tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR);
1424 	SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_RB_RPTR, tmp, 0xFFFFFFFF);
1425 
1426 	SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 1,
1427 		UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1428 
1429 	/* disable dynamic power gating mode */
1430 	WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 0,
1431 		~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
1432 }
1433 
1434 /**
1435  * vcn_v4_0_stop - VCN stop
1436  *
1437  * @adev: amdgpu_device pointer
1438  *
1439  * Stop VCN block
1440  */
1441 static int vcn_v4_0_stop(struct amdgpu_device *adev)
1442 {
1443 	volatile struct amdgpu_vcn4_fw_shared *fw_shared;
1444 	uint32_t tmp;
1445 	int i, r = 0;
1446 
1447 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1448 		fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
1449 		fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF;
1450 
1451 		if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
1452 			vcn_v4_0_stop_dpg_mode(adev, i);
1453 			continue;
1454 		}
1455 
1456 		/* wait for vcn idle */
1457 		r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_STATUS, UVD_STATUS__IDLE, 0x7);
1458 		if (r)
1459 			return r;
1460 
1461 		tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
1462 			UVD_LMI_STATUS__READ_CLEAN_MASK |
1463 			UVD_LMI_STATUS__WRITE_CLEAN_MASK |
1464 			UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
1465 		r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp);
1466 		if (r)
1467 			return r;
1468 
1469 		/* disable LMI UMC channel */
1470 		tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL2);
1471 		tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
1472 		WREG32_SOC15(VCN, i, regUVD_LMI_CTRL2, tmp);
1473 		tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK |
1474 			UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
1475 		r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp);
1476 		if (r)
1477 			return r;
1478 
1479 		/* block VCPU register access */
1480 		WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL),
1481 				UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
1482 				~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
1483 
1484 		/* reset VCPU */
1485 		WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
1486 				UVD_VCPU_CNTL__BLK_RST_MASK,
1487 				~UVD_VCPU_CNTL__BLK_RST_MASK);
1488 
1489 		/* disable VCPU clock */
1490 		WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
1491 				~(UVD_VCPU_CNTL__CLK_EN_MASK));
1492 
1493 		/* apply soft reset */
1494 		tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
1495 		tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
1496 		WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
1497 		tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
1498 		tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
1499 		WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
1500 
1501 		/* clear status */
1502 		WREG32_SOC15(VCN, i, regUVD_STATUS, 0);
1503 
1504 		/* apply HW clock gating */
1505 		vcn_v4_0_enable_clock_gating(adev, i);
1506 
1507 		/* enable VCN power gating */
1508 		vcn_v4_0_enable_static_power_gating(adev, i);
1509 	}
1510 
1511 	if (adev->pm.dpm_enabled)
1512 		amdgpu_dpm_enable_uvd(adev, false);
1513 
1514 	return 0;
1515 }
1516 
1517 /**
1518  * vcn_v4_0_pause_dpg_mode - VCN pause with dpg mode
1519  *
1520  * @adev: amdgpu_device pointer
1521  * @inst_idx: instance number index
1522  * @new_state: pause state
1523  *
1524  * Pause dpg mode for VCN block
1525  */
1526 static int vcn_v4_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx,
1527       struct dpg_pause_state *new_state)
1528 {
1529 	uint32_t reg_data = 0;
1530 	int ret_code;
1531 
1532 	/* pause/unpause if state is changed */
1533 	if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) {
1534 		DRM_DEV_DEBUG(adev->dev, "dpg pause state changed %d -> %d",
1535 			adev->vcn.inst[inst_idx].pause_state.fw_based,	new_state->fw_based);
1536 		reg_data = RREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE) &
1537 			(~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
1538 
1539 		if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
1540 			ret_code = SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 0x1,
1541 				UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1542 
1543 			if (!ret_code) {
1544 				/* pause DPG */
1545 				reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
1546 				WREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE, reg_data);
1547 
1548 				/* wait for ACK */
1549 				SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_DPG_PAUSE,
1550 					UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
1551 					UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
1552 
1553 				SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS,
1554 					UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1555 			}
1556 		} else {
1557 			/* unpause dpg, no need to wait */
1558 			reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
1559 			WREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE, reg_data);
1560 		}
1561 		adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based;
1562 	}
1563 
1564 	return 0;
1565 }
1566 
1567 /**
1568  * vcn_v4_0_unified_ring_get_rptr - get unified read pointer
1569  *
1570  * @ring: amdgpu_ring pointer
1571  *
1572  * Returns the current hardware unified read pointer
1573  */
1574 static uint64_t vcn_v4_0_unified_ring_get_rptr(struct amdgpu_ring *ring)
1575 {
1576 	struct amdgpu_device *adev = ring->adev;
1577 
1578 	if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
1579 		DRM_ERROR("wrong ring id is identified in %s", __func__);
1580 
1581 	return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR);
1582 }
1583 
1584 /**
1585  * vcn_v4_0_unified_ring_get_wptr - get unified write pointer
1586  *
1587  * @ring: amdgpu_ring pointer
1588  *
1589  * Returns the current hardware unified write pointer
1590  */
1591 static uint64_t vcn_v4_0_unified_ring_get_wptr(struct amdgpu_ring *ring)
1592 {
1593 	struct amdgpu_device *adev = ring->adev;
1594 
1595 	if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
1596 		DRM_ERROR("wrong ring id is identified in %s", __func__);
1597 
1598 	if (ring->use_doorbell)
1599 		return *ring->wptr_cpu_addr;
1600 	else
1601 		return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR);
1602 }
1603 
1604 /**
1605  * vcn_v4_0_unified_ring_set_wptr - set enc write pointer
1606  *
1607  * @ring: amdgpu_ring pointer
1608  *
1609  * Commits the enc write pointer to the hardware
1610  */
1611 static void vcn_v4_0_unified_ring_set_wptr(struct amdgpu_ring *ring)
1612 {
1613 	struct amdgpu_device *adev = ring->adev;
1614 
1615 	if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
1616 		DRM_ERROR("wrong ring id is identified in %s", __func__);
1617 
1618 	if (ring->use_doorbell) {
1619 		*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
1620 		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
1621 	} else {
1622 		WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR, lower_32_bits(ring->wptr));
1623 	}
1624 }
1625 
1626 static int vcn_v4_0_limit_sched(struct amdgpu_cs_parser *p,
1627 				struct amdgpu_job *job)
1628 {
1629 	struct drm_gpu_scheduler **scheds;
1630 
1631 	/* The create msg must be in the first IB submitted */
1632 	if (atomic_read(&job->base.entity->fence_seq))
1633 		return -EINVAL;
1634 
1635 	scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_ENC]
1636 		[AMDGPU_RING_PRIO_0].sched;
1637 	drm_sched_entity_modify_sched(job->base.entity, scheds, 1);
1638 	return 0;
1639 }
1640 
1641 static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job,
1642 			    uint64_t addr)
1643 {
1644 	struct ttm_operation_ctx ctx = { false, false };
1645 	struct amdgpu_bo_va_mapping *map;
1646 	uint32_t *msg, num_buffers;
1647 	struct amdgpu_bo *bo;
1648 	uint64_t start, end;
1649 	unsigned int i;
1650 	void *ptr;
1651 	int r;
1652 
1653 	addr &= AMDGPU_GMC_HOLE_MASK;
1654 	r = amdgpu_cs_find_mapping(p, addr, &bo, &map);
1655 	if (r) {
1656 		DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr);
1657 		return r;
1658 	}
1659 
1660 	start = map->start * AMDGPU_GPU_PAGE_SIZE;
1661 	end = (map->last + 1) * AMDGPU_GPU_PAGE_SIZE;
1662 	if (addr & 0x7) {
1663 		DRM_ERROR("VCN messages must be 8 byte aligned!\n");
1664 		return -EINVAL;
1665 	}
1666 
1667 	bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
1668 	amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
1669 	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
1670 	if (r) {
1671 		DRM_ERROR("Failed validating the VCN message BO (%d)!\n", r);
1672 		return r;
1673 	}
1674 
1675 	r = amdgpu_bo_kmap(bo, &ptr);
1676 	if (r) {
1677 		DRM_ERROR("Failed mapping the VCN message (%d)!\n", r);
1678 		return r;
1679 	}
1680 
1681 	msg = ptr + addr - start;
1682 
1683 	/* Check length */
1684 	if (msg[1] > end - addr) {
1685 		r = -EINVAL;
1686 		goto out;
1687 	}
1688 
1689 	if (msg[3] != RDECODE_MSG_CREATE)
1690 		goto out;
1691 
1692 	num_buffers = msg[2];
1693 	for (i = 0, msg = &msg[6]; i < num_buffers; ++i, msg += 4) {
1694 		uint32_t offset, size, *create;
1695 
1696 		if (msg[0] != RDECODE_MESSAGE_CREATE)
1697 			continue;
1698 
1699 		offset = msg[1];
1700 		size = msg[2];
1701 
1702 		if (offset + size > end) {
1703 			r = -EINVAL;
1704 			goto out;
1705 		}
1706 
1707 		create = ptr + addr + offset - start;
1708 
1709 		/* H246, HEVC and VP9 can run on any instance */
1710 		if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11)
1711 			continue;
1712 
1713 		r = vcn_v4_0_limit_sched(p, job);
1714 		if (r)
1715 			goto out;
1716 	}
1717 
1718 out:
1719 	amdgpu_bo_kunmap(bo);
1720 	return r;
1721 }
1722 
1723 #define RADEON_VCN_ENGINE_TYPE_DECODE                                 (0x00000003)
1724 
1725 static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
1726 					   struct amdgpu_job *job,
1727 					   struct amdgpu_ib *ib)
1728 {
1729 	struct amdgpu_ring *ring = amdgpu_job_ring(job);
1730 	struct amdgpu_vcn_decode_buffer *decode_buffer;
1731 	uint64_t addr;
1732 	uint32_t val;
1733 
1734 	/* The first instance can decode anything */
1735 	if (!ring->me)
1736 		return 0;
1737 
1738 	/* unified queue ib header has 8 double words. */
1739 	if (ib->length_dw < 8)
1740 		return 0;
1741 
1742 	val = amdgpu_ib_get_value(ib, 6); //RADEON_VCN_ENGINE_TYPE
1743 	if (val != RADEON_VCN_ENGINE_TYPE_DECODE)
1744 		return 0;
1745 
1746 	decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[10];
1747 
1748 	if (!(decode_buffer->valid_buf_flag  & 0x1))
1749 		return 0;
1750 
1751 	addr = ((u64)decode_buffer->msg_buffer_address_hi) << 32 |
1752 		decode_buffer->msg_buffer_address_lo;
1753 	return vcn_v4_0_dec_msg(p, job, addr);
1754 }
1755 
1756 static const struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = {
1757 	.type = AMDGPU_RING_TYPE_VCN_ENC,
1758 	.align_mask = 0x3f,
1759 	.nop = VCN_ENC_CMD_NO_OP,
1760 	.vmhub = AMDGPU_MMHUB_0,
1761 	.get_rptr = vcn_v4_0_unified_ring_get_rptr,
1762 	.get_wptr = vcn_v4_0_unified_ring_get_wptr,
1763 	.set_wptr = vcn_v4_0_unified_ring_set_wptr,
1764 	.patch_cs_in_place = vcn_v4_0_ring_patch_cs_in_place,
1765 	.emit_frame_size =
1766 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1767 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1768 		4 + /* vcn_v2_0_enc_ring_emit_vm_flush */
1769 		5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */
1770 		1, /* vcn_v2_0_enc_ring_insert_end */
1771 	.emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */
1772 	.emit_ib = vcn_v2_0_enc_ring_emit_ib,
1773 	.emit_fence = vcn_v2_0_enc_ring_emit_fence,
1774 	.emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush,
1775 	.test_ring = amdgpu_vcn_enc_ring_test_ring,
1776 	.test_ib = amdgpu_vcn_unified_ring_test_ib,
1777 	.insert_nop = amdgpu_ring_insert_nop,
1778 	.insert_end = vcn_v2_0_enc_ring_insert_end,
1779 	.pad_ib = amdgpu_ring_generic_pad_ib,
1780 	.begin_use = amdgpu_vcn_ring_begin_use,
1781 	.end_use = amdgpu_vcn_ring_end_use,
1782 	.emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
1783 	.emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
1784 	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1785 };
1786 
1787 /**
1788  * vcn_v4_0_set_unified_ring_funcs - set unified ring functions
1789  *
1790  * @adev: amdgpu_device pointer
1791  *
1792  * Set unified ring functions
1793  */
1794 static void vcn_v4_0_set_unified_ring_funcs(struct amdgpu_device *adev)
1795 {
1796 	int i;
1797 
1798 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1799 		if (adev->vcn.harvest_config & (1 << i))
1800 			continue;
1801 
1802 		adev->vcn.inst[i].ring_enc[0].funcs = &vcn_v4_0_unified_ring_vm_funcs;
1803 		adev->vcn.inst[i].ring_enc[0].me = i;
1804 
1805 		DRM_INFO("VCN(%d) encode/decode are enabled in VM mode\n", i);
1806 	}
1807 }
1808 
1809 /**
1810  * vcn_v4_0_is_idle - check VCN block is idle
1811  *
1812  * @handle: amdgpu_device pointer
1813  *
1814  * Check whether VCN block is idle
1815  */
1816 static bool vcn_v4_0_is_idle(void *handle)
1817 {
1818 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1819 	int i, ret = 1;
1820 
1821 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1822 		if (adev->vcn.harvest_config & (1 << i))
1823 			continue;
1824 
1825 		ret &= (RREG32_SOC15(VCN, i, regUVD_STATUS) == UVD_STATUS__IDLE);
1826 	}
1827 
1828 	return ret;
1829 }
1830 
1831 /**
1832  * vcn_v4_0_wait_for_idle - wait for VCN block idle
1833  *
1834  * @handle: amdgpu_device pointer
1835  *
1836  * Wait for VCN block idle
1837  */
1838 static int vcn_v4_0_wait_for_idle(void *handle)
1839 {
1840 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1841 	int i, ret = 0;
1842 
1843 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1844 		if (adev->vcn.harvest_config & (1 << i))
1845 			continue;
1846 
1847 		ret = SOC15_WAIT_ON_RREG(VCN, i, regUVD_STATUS, UVD_STATUS__IDLE,
1848 			UVD_STATUS__IDLE);
1849 		if (ret)
1850 			return ret;
1851 	}
1852 
1853 	return ret;
1854 }
1855 
1856 /**
1857  * vcn_v4_0_set_clockgating_state - set VCN block clockgating state
1858  *
1859  * @handle: amdgpu_device pointer
1860  * @state: clock gating state
1861  *
1862  * Set VCN block clockgating state
1863  */
1864 static int vcn_v4_0_set_clockgating_state(void *handle, enum amd_clockgating_state state)
1865 {
1866 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1867 	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
1868 	int i;
1869 
1870 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1871 		if (adev->vcn.harvest_config & (1 << i))
1872 			continue;
1873 
1874 		if (enable) {
1875 			if (RREG32_SOC15(VCN, i, regUVD_STATUS) != UVD_STATUS__IDLE)
1876 				return -EBUSY;
1877 			vcn_v4_0_enable_clock_gating(adev, i);
1878 		} else {
1879 			vcn_v4_0_disable_clock_gating(adev, i);
1880 		}
1881 	}
1882 
1883 	return 0;
1884 }
1885 
1886 /**
1887  * vcn_v4_0_set_powergating_state - set VCN block powergating state
1888  *
1889  * @handle: amdgpu_device pointer
1890  * @state: power gating state
1891  *
1892  * Set VCN block powergating state
1893  */
1894 static int vcn_v4_0_set_powergating_state(void *handle, enum amd_powergating_state state)
1895 {
1896 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1897 	int ret;
1898 
1899 	/* for SRIOV, guest should not control VCN Power-gating
1900 	 * MMSCH FW should control Power-gating and clock-gating
1901 	 * guest should avoid touching CGC and PG
1902 	 */
1903 	if (amdgpu_sriov_vf(adev)) {
1904 		adev->vcn.cur_state = AMD_PG_STATE_UNGATE;
1905 		return 0;
1906 	}
1907 
1908 	if(state == adev->vcn.cur_state)
1909 		return 0;
1910 
1911 	if (state == AMD_PG_STATE_GATE)
1912 		ret = vcn_v4_0_stop(adev);
1913 	else
1914 		ret = vcn_v4_0_start(adev);
1915 
1916 	if(!ret)
1917 		adev->vcn.cur_state = state;
1918 
1919 	return ret;
1920 }
1921 
1922 /**
1923  * vcn_v4_0_set_interrupt_state - set VCN block interrupt state
1924  *
1925  * @adev: amdgpu_device pointer
1926  * @source: interrupt sources
1927  * @type: interrupt types
1928  * @state: interrupt states
1929  *
1930  * Set VCN block interrupt state
1931  */
1932 static int vcn_v4_0_set_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source,
1933       unsigned type, enum amdgpu_interrupt_state state)
1934 {
1935 	return 0;
1936 }
1937 
1938 /**
1939  * vcn_v4_0_process_interrupt - process VCN block interrupt
1940  *
1941  * @adev: amdgpu_device pointer
1942  * @source: interrupt sources
1943  * @entry: interrupt entry from clients and sources
1944  *
1945  * Process VCN block interrupt
1946  */
1947 static int vcn_v4_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source,
1948       struct amdgpu_iv_entry *entry)
1949 {
1950 	uint32_t ip_instance;
1951 
1952 	switch (entry->client_id) {
1953 	case SOC15_IH_CLIENTID_VCN:
1954 		ip_instance = 0;
1955 		break;
1956 	case SOC15_IH_CLIENTID_VCN1:
1957 		ip_instance = 1;
1958 		break;
1959 	default:
1960 		DRM_ERROR("Unhandled client id: %d\n", entry->client_id);
1961 		return 0;
1962 	}
1963 
1964 	DRM_DEBUG("IH: VCN TRAP\n");
1965 
1966 	switch (entry->src_id) {
1967 	case VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
1968 		amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]);
1969 		break;
1970 	case VCN_4_0__SRCID_UVD_POISON:
1971 		amdgpu_vcn_process_poison_irq(adev, source, entry);
1972 		break;
1973 	default:
1974 		DRM_ERROR("Unhandled interrupt: %d %d\n",
1975 			  entry->src_id, entry->src_data[0]);
1976 		break;
1977 	}
1978 
1979 	return 0;
1980 }
1981 
1982 static const struct amdgpu_irq_src_funcs vcn_v4_0_irq_funcs = {
1983 	.set = vcn_v4_0_set_interrupt_state,
1984 	.process = vcn_v4_0_process_interrupt,
1985 };
1986 
1987 /**
1988  * vcn_v4_0_set_irq_funcs - set VCN block interrupt irq functions
1989  *
1990  * @adev: amdgpu_device pointer
1991  *
1992  * Set VCN block interrupt irq functions
1993  */
1994 static void vcn_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1995 {
1996 	int i;
1997 
1998 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1999 		if (adev->vcn.harvest_config & (1 << i))
2000 			continue;
2001 
2002 		adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 1;
2003 		adev->vcn.inst[i].irq.funcs = &vcn_v4_0_irq_funcs;
2004 	}
2005 }
2006 
2007 static const struct amd_ip_funcs vcn_v4_0_ip_funcs = {
2008 	.name = "vcn_v4_0",
2009 	.early_init = vcn_v4_0_early_init,
2010 	.late_init = NULL,
2011 	.sw_init = vcn_v4_0_sw_init,
2012 	.sw_fini = vcn_v4_0_sw_fini,
2013 	.hw_init = vcn_v4_0_hw_init,
2014 	.hw_fini = vcn_v4_0_hw_fini,
2015 	.suspend = vcn_v4_0_suspend,
2016 	.resume = vcn_v4_0_resume,
2017 	.is_idle = vcn_v4_0_is_idle,
2018 	.wait_for_idle = vcn_v4_0_wait_for_idle,
2019 	.check_soft_reset = NULL,
2020 	.pre_soft_reset = NULL,
2021 	.soft_reset = NULL,
2022 	.post_soft_reset = NULL,
2023 	.set_clockgating_state = vcn_v4_0_set_clockgating_state,
2024 	.set_powergating_state = vcn_v4_0_set_powergating_state,
2025 };
2026 
2027 const struct amdgpu_ip_block_version vcn_v4_0_ip_block =
2028 {
2029 	.type = AMD_IP_BLOCK_TYPE_VCN,
2030 	.major = 4,
2031 	.minor = 0,
2032 	.rev = 0,
2033 	.funcs = &vcn_v4_0_ip_funcs,
2034 };
2035 
2036 static uint32_t vcn_v4_0_query_poison_by_instance(struct amdgpu_device *adev,
2037 			uint32_t instance, uint32_t sub_block)
2038 {
2039 	uint32_t poison_stat = 0, reg_value = 0;
2040 
2041 	switch (sub_block) {
2042 	case AMDGPU_VCN_V4_0_VCPU_VCODEC:
2043 		reg_value = RREG32_SOC15(VCN, instance, regUVD_RAS_VCPU_VCODEC_STATUS);
2044 		poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_VCPU_VCODEC_STATUS, POISONED_PF);
2045 		break;
2046 	default:
2047 		break;
2048 	}
2049 
2050 	if (poison_stat)
2051 		dev_info(adev->dev, "Poison detected in VCN%d, sub_block%d\n",
2052 			instance, sub_block);
2053 
2054 	return poison_stat;
2055 }
2056 
2057 static bool vcn_v4_0_query_ras_poison_status(struct amdgpu_device *adev)
2058 {
2059 	uint32_t inst, sub;
2060 	uint32_t poison_stat = 0;
2061 
2062 	for (inst = 0; inst < adev->vcn.num_vcn_inst; inst++)
2063 		for (sub = 0; sub < AMDGPU_VCN_V4_0_MAX_SUB_BLOCK; sub++)
2064 			poison_stat +=
2065 				vcn_v4_0_query_poison_by_instance(adev, inst, sub);
2066 
2067 	return !!poison_stat;
2068 }
2069 
2070 const struct amdgpu_ras_block_hw_ops vcn_v4_0_ras_hw_ops = {
2071 	.query_poison_status = vcn_v4_0_query_ras_poison_status,
2072 };
2073 
2074 static struct amdgpu_vcn_ras vcn_v4_0_ras = {
2075 	.ras_block = {
2076 		.hw_ops = &vcn_v4_0_ras_hw_ops,
2077 	},
2078 };
2079 
2080 static void vcn_v4_0_set_ras_funcs(struct amdgpu_device *adev)
2081 {
2082 	switch (adev->ip_versions[VCN_HWIP][0]) {
2083 	case IP_VERSION(4, 0, 0):
2084 		adev->vcn.ras = &vcn_v4_0_ras;
2085 		break;
2086 	default:
2087 		break;
2088 	}
2089 
2090 	amdgpu_vcn_set_ras_funcs(adev);
2091 }
2092