1c1dc356aSLeo Liu /*
2c1dc356aSLeo Liu * Copyright 2016 Advanced Micro Devices, Inc.
3c1dc356aSLeo Liu * All Rights Reserved.
4c1dc356aSLeo Liu *
5c1dc356aSLeo Liu * Permission is hereby granted, free of charge, to any person obtaining a
6c1dc356aSLeo Liu * copy of this software and associated documentation files (the
7c1dc356aSLeo Liu * "Software"), to deal in the Software without restriction, including
8c1dc356aSLeo Liu * without limitation the rights to use, copy, modify, merge, publish,
9c1dc356aSLeo Liu * distribute, sub license, and/or sell copies of the Software, and to
10c1dc356aSLeo Liu * permit persons to whom the Software is furnished to do so, subject to
11c1dc356aSLeo Liu * the following conditions:
12c1dc356aSLeo Liu *
13c1dc356aSLeo Liu * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14c1dc356aSLeo Liu * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15c1dc356aSLeo Liu * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16c1dc356aSLeo Liu * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17c1dc356aSLeo Liu * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18c1dc356aSLeo Liu * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19c1dc356aSLeo Liu * USE OR OTHER DEALINGS IN THE SOFTWARE.
20c1dc356aSLeo Liu *
21c1dc356aSLeo Liu * The above copyright notice and this permission notice (including the
22c1dc356aSLeo Liu * next paragraph) shall be included in all copies or substantial portions
23c1dc356aSLeo Liu * of the Software.
24c1dc356aSLeo Liu *
25c1dc356aSLeo Liu */
26c1dc356aSLeo Liu
27c1dc356aSLeo Liu #include <linux/firmware.h>
28f89f8c6bSAndrey Grodzovsky #include <drm/drm_drv.h>
2947b757fbSSam Ravnborg
30c1dc356aSLeo Liu #include "amdgpu.h"
31c1dc356aSLeo Liu #include "amdgpu_vce.h"
329096d6e5SChristian König #include "soc15.h"
33c1dc356aSLeo Liu #include "soc15d.h"
34c1dc356aSLeo Liu #include "soc15_common.h"
35bf4305feSXiangliang Yu #include "mmsch_v1_0.h"
36c1dc356aSLeo Liu
3718297a21SFeifei Xu #include "vce/vce_4_0_offset.h"
3818297a21SFeifei Xu #include "vce/vce_4_0_default.h"
3918297a21SFeifei Xu #include "vce/vce_4_0_sh_mask.h"
4065417d9fSFeifei Xu #include "mmhub/mmhub_1_0_offset.h"
4165417d9fSFeifei Xu #include "mmhub/mmhub_1_0_sh_mask.h"
42c1dc356aSLeo Liu
4344a99b65SAndrey Grodzovsky #include "ivsrcid/vce/irqsrcs_vce_4_0.h"
4444a99b65SAndrey Grodzovsky
45c1dc356aSLeo Liu #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02
46c1dc356aSLeo Liu
47c1dc356aSLeo Liu #define VCE_V4_0_FW_SIZE (384 * 1024)
48c1dc356aSLeo Liu #define VCE_V4_0_STACK_SIZE (64 * 1024)
49c1dc356aSLeo Liu #define VCE_V4_0_DATA_SIZE ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
50c1dc356aSLeo Liu
51c1dc356aSLeo Liu static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
52c1dc356aSLeo Liu static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
53c1dc356aSLeo Liu static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
54c1dc356aSLeo Liu
55c1dc356aSLeo Liu /**
56c1dc356aSLeo Liu * vce_v4_0_ring_get_rptr - get read pointer
57c1dc356aSLeo Liu *
58c1dc356aSLeo Liu * @ring: amdgpu_ring pointer
59c1dc356aSLeo Liu *
60c1dc356aSLeo Liu * Returns the current hardware read pointer
61c1dc356aSLeo Liu */
vce_v4_0_ring_get_rptr(struct amdgpu_ring * ring)62c1dc356aSLeo Liu static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
63c1dc356aSLeo Liu {
64c1dc356aSLeo Liu struct amdgpu_device *adev = ring->adev;
65c1dc356aSLeo Liu
665d4af988SAlex Deucher if (ring->me == 0)
67c1dc356aSLeo Liu return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
685d4af988SAlex Deucher else if (ring->me == 1)
69c1dc356aSLeo Liu return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
70c1dc356aSLeo Liu else
71c1dc356aSLeo Liu return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
72c1dc356aSLeo Liu }
73c1dc356aSLeo Liu
74c1dc356aSLeo Liu /**
75c1dc356aSLeo Liu * vce_v4_0_ring_get_wptr - get write pointer
76c1dc356aSLeo Liu *
77c1dc356aSLeo Liu * @ring: amdgpu_ring pointer
78c1dc356aSLeo Liu *
79c1dc356aSLeo Liu * Returns the current hardware write pointer
80c1dc356aSLeo Liu */
vce_v4_0_ring_get_wptr(struct amdgpu_ring * ring)81c1dc356aSLeo Liu static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
82c1dc356aSLeo Liu {
83c1dc356aSLeo Liu struct amdgpu_device *adev = ring->adev;
84c1dc356aSLeo Liu
85bae5b519SXiangliang Yu if (ring->use_doorbell)
863748424bSJack Xiao return *ring->wptr_cpu_addr;
87bae5b519SXiangliang Yu
885d4af988SAlex Deucher if (ring->me == 0)
89c1dc356aSLeo Liu return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
905d4af988SAlex Deucher else if (ring->me == 1)
91c1dc356aSLeo Liu return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
92c1dc356aSLeo Liu else
93c1dc356aSLeo Liu return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
94c1dc356aSLeo Liu }
95c1dc356aSLeo Liu
96c1dc356aSLeo Liu /**
97c1dc356aSLeo Liu * vce_v4_0_ring_set_wptr - set write pointer
98c1dc356aSLeo Liu *
99c1dc356aSLeo Liu * @ring: amdgpu_ring pointer
100c1dc356aSLeo Liu *
101c1dc356aSLeo Liu * Commits the write pointer to the hardware
102c1dc356aSLeo Liu */
vce_v4_0_ring_set_wptr(struct amdgpu_ring * ring)103c1dc356aSLeo Liu static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
104c1dc356aSLeo Liu {
105c1dc356aSLeo Liu struct amdgpu_device *adev = ring->adev;
106c1dc356aSLeo Liu
107bae5b519SXiangliang Yu if (ring->use_doorbell) {
108bae5b519SXiangliang Yu /* XXX check if swapping is necessary on BE */
1093748424bSJack Xiao *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
110bae5b519SXiangliang Yu WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
111bae5b519SXiangliang Yu return;
112bae5b519SXiangliang Yu }
113bae5b519SXiangliang Yu
1145d4af988SAlex Deucher if (ring->me == 0)
115c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
116c1dc356aSLeo Liu lower_32_bits(ring->wptr));
1175d4af988SAlex Deucher else if (ring->me == 1)
118c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
119c1dc356aSLeo Liu lower_32_bits(ring->wptr));
120c1dc356aSLeo Liu else
121c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
122c1dc356aSLeo Liu lower_32_bits(ring->wptr));
123c1dc356aSLeo Liu }
124c1dc356aSLeo Liu
vce_v4_0_firmware_loaded(struct amdgpu_device * adev)125c1dc356aSLeo Liu static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
126c1dc356aSLeo Liu {
127c1dc356aSLeo Liu int i, j;
128c1dc356aSLeo Liu
129c1dc356aSLeo Liu for (i = 0; i < 10; ++i) {
130c1dc356aSLeo Liu for (j = 0; j < 100; ++j) {
131c1dc356aSLeo Liu uint32_t status =
132c1dc356aSLeo Liu RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
133c1dc356aSLeo Liu
134c1dc356aSLeo Liu if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
135c1dc356aSLeo Liu return 0;
136c1dc356aSLeo Liu mdelay(10);
137c1dc356aSLeo Liu }
138c1dc356aSLeo Liu
139c1dc356aSLeo Liu DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
140c1dc356aSLeo Liu WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
141c1dc356aSLeo Liu VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
142c1dc356aSLeo Liu ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
143c1dc356aSLeo Liu mdelay(10);
144c1dc356aSLeo Liu WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
145c1dc356aSLeo Liu ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
146c1dc356aSLeo Liu mdelay(10);
147c1dc356aSLeo Liu
148c1dc356aSLeo Liu }
149c1dc356aSLeo Liu
150c1dc356aSLeo Liu return -ETIMEDOUT;
151c1dc356aSLeo Liu }
152c1dc356aSLeo Liu
vce_v4_0_mmsch_start(struct amdgpu_device * adev,struct amdgpu_mm_table * table)153bf4305feSXiangliang Yu static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
154bf4305feSXiangliang Yu struct amdgpu_mm_table *table)
155bf4305feSXiangliang Yu {
156bf4305feSXiangliang Yu uint32_t data = 0, loop;
157bf4305feSXiangliang Yu uint64_t addr = table->gpu_addr;
158bf4305feSXiangliang Yu struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
159bf4305feSXiangliang Yu uint32_t size;
160bf4305feSXiangliang Yu
161bf4305feSXiangliang Yu size = header->header_size + header->vce_table_size + header->uvd_table_size;
162bf4305feSXiangliang Yu
163bf4305feSXiangliang Yu /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
164bf4305feSXiangliang Yu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
165bf4305feSXiangliang Yu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
166bf4305feSXiangliang Yu
167bf4305feSXiangliang Yu /* 2, update vmid of descriptor */
168bf4305feSXiangliang Yu data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
169bf4305feSXiangliang Yu data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
170bf4305feSXiangliang Yu data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
171bf4305feSXiangliang Yu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
172bf4305feSXiangliang Yu
173bf4305feSXiangliang Yu /* 3, notify mmsch about the size of this descriptor */
174bf4305feSXiangliang Yu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
175bf4305feSXiangliang Yu
176bf4305feSXiangliang Yu /* 4, set resp to zero */
177bf4305feSXiangliang Yu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
178bf4305feSXiangliang Yu
179ab2b2e4fSFrank Min WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
1803748424bSJack Xiao *adev->vce.ring[0].wptr_cpu_addr = 0;
181ab2b2e4fSFrank Min adev->vce.ring[0].wptr = 0;
182ab2b2e4fSFrank Min adev->vce.ring[0].wptr_old = 0;
183ab2b2e4fSFrank Min
184bf4305feSXiangliang Yu /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
185bf4305feSXiangliang Yu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
186bf4305feSXiangliang Yu
187bf4305feSXiangliang Yu data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
188bf4305feSXiangliang Yu loop = 1000;
189bf4305feSXiangliang Yu while ((data & 0x10000002) != 0x10000002) {
190bf4305feSXiangliang Yu udelay(10);
191bf4305feSXiangliang Yu data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
192bf4305feSXiangliang Yu loop--;
193bf4305feSXiangliang Yu if (!loop)
194bf4305feSXiangliang Yu break;
195bf4305feSXiangliang Yu }
196bf4305feSXiangliang Yu
197bf4305feSXiangliang Yu if (!loop) {
198bf4305feSXiangliang Yu dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
199bf4305feSXiangliang Yu return -EBUSY;
200bf4305feSXiangliang Yu }
201bf4305feSXiangliang Yu
202bf4305feSXiangliang Yu return 0;
203bf4305feSXiangliang Yu }
204bf4305feSXiangliang Yu
vce_v4_0_sriov_start(struct amdgpu_device * adev)205bf4305feSXiangliang Yu static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
206bf4305feSXiangliang Yu {
207bf4305feSXiangliang Yu struct amdgpu_ring *ring;
208bf4305feSXiangliang Yu uint32_t offset, size;
209bf4305feSXiangliang Yu uint32_t table_size = 0;
2109e8e453aSHarry Wentland struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
2119e8e453aSHarry Wentland struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
2129e8e453aSHarry Wentland struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
2139e8e453aSHarry Wentland struct mmsch_v1_0_cmd_end end = { { 0 } };
214bf4305feSXiangliang Yu uint32_t *init_table = adev->virt.mm_table.cpu_addr;
215bf4305feSXiangliang Yu struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
216bf4305feSXiangliang Yu
217bf4305feSXiangliang Yu direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
218bf4305feSXiangliang Yu direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
219bf4305feSXiangliang Yu direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
220bf4305feSXiangliang Yu end.cmd_header.command_type = MMSCH_COMMAND__END;
221bf4305feSXiangliang Yu
222bf4305feSXiangliang Yu if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
223bf4305feSXiangliang Yu header->version = MMSCH_VERSION;
224bf4305feSXiangliang Yu header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
225bf4305feSXiangliang Yu
226bf4305feSXiangliang Yu if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
227bf4305feSXiangliang Yu header->vce_table_offset = header->header_size;
228bf4305feSXiangliang Yu else
229bf4305feSXiangliang Yu header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
230bf4305feSXiangliang Yu
231bf4305feSXiangliang Yu init_table += header->vce_table_offset;
232bf4305feSXiangliang Yu
233bf4305feSXiangliang Yu ring = &adev->vce.ring[0];
234a92f5ec0SFrank Min MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
235a92f5ec0SFrank Min lower_32_bits(ring->gpu_addr));
236a92f5ec0SFrank Min MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
237a92f5ec0SFrank Min upper_32_bits(ring->gpu_addr));
238a92f5ec0SFrank Min MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
239a92f5ec0SFrank Min ring->ring_size / 4);
240bf4305feSXiangliang Yu
241bf4305feSXiangliang Yu /* BEGING OF MC_RESUME */
242a92f5ec0SFrank Min MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
243a92f5ec0SFrank Min MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
244a92f5ec0SFrank Min MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
245a92f5ec0SFrank Min MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
246a92f5ec0SFrank Min MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
247bf4305feSXiangliang Yu
248992fbe8cSTrigger Huang offset = AMDGPU_VCE_FIRMWARE_OFFSET;
249b53b8cdaSDaniel Wang if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
250992fbe8cSTrigger Huang uint32_t low = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
251992fbe8cSTrigger Huang uint32_t hi = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi;
252992fbe8cSTrigger Huang uint64_t tmr_mc_addr = (uint64_t)(hi) << 32 | low;
253992fbe8cSTrigger Huang
25472257043SFrank Min MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
255992fbe8cSTrigger Huang mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), tmr_mc_addr >> 8);
25672257043SFrank Min MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
25772257043SFrank Min mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
258992fbe8cSTrigger Huang (tmr_mc_addr >> 40) & 0xff);
259992fbe8cSTrigger Huang MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
260b53b8cdaSDaniel Wang } else {
26172257043SFrank Min MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
26272257043SFrank Min mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
263b53b8cdaSDaniel Wang adev->vce.gpu_addr >> 8);
26472257043SFrank Min MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
26572257043SFrank Min mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
26672257043SFrank Min (adev->vce.gpu_addr >> 40) & 0xff);
267992fbe8cSTrigger Huang MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
268992fbe8cSTrigger Huang offset & ~0x0f000000);
269992fbe8cSTrigger Huang
270b53b8cdaSDaniel Wang }
27172257043SFrank Min MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
27272257043SFrank Min mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
27372257043SFrank Min adev->vce.gpu_addr >> 8);
27472257043SFrank Min MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
27572257043SFrank Min mmVCE_LMI_VCPU_CACHE_64BIT_BAR1),
27672257043SFrank Min (adev->vce.gpu_addr >> 40) & 0xff);
27772257043SFrank Min MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
27872257043SFrank Min mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
27972257043SFrank Min adev->vce.gpu_addr >> 8);
28072257043SFrank Min MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
28172257043SFrank Min mmVCE_LMI_VCPU_CACHE_64BIT_BAR2),
28272257043SFrank Min (adev->vce.gpu_addr >> 40) & 0xff);
283bf4305feSXiangliang Yu
284bf4305feSXiangliang Yu size = VCE_V4_0_FW_SIZE;
285a92f5ec0SFrank Min MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
286bf4305feSXiangliang Yu
28772257043SFrank Min offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
288bf4305feSXiangliang Yu size = VCE_V4_0_STACK_SIZE;
289a92f5ec0SFrank Min MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
29072257043SFrank Min (offset & ~0x0f000000) | (1 << 24));
291a92f5ec0SFrank Min MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
292bf4305feSXiangliang Yu
293bf4305feSXiangliang Yu offset += size;
294bf4305feSXiangliang Yu size = VCE_V4_0_DATA_SIZE;
295a92f5ec0SFrank Min MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
29672257043SFrank Min (offset & ~0x0f000000) | (2 << 24));
297a92f5ec0SFrank Min MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
298bf4305feSXiangliang Yu
299a92f5ec0SFrank Min MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
300a92f5ec0SFrank Min MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
301a1aacc97SFrank Min VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
302a1aacc97SFrank Min VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
303bf4305feSXiangliang Yu
304bf4305feSXiangliang Yu /* end of MC_RESUME */
305a92f5ec0SFrank Min MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
30671f2af89SFrank Min VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
307a92f5ec0SFrank Min MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
308bf4305feSXiangliang Yu ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
309a92f5ec0SFrank Min MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
310bf4305feSXiangliang Yu ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
311bf4305feSXiangliang Yu
312a92f5ec0SFrank Min MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
313bf4305feSXiangliang Yu VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
314bf4305feSXiangliang Yu VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
315bf4305feSXiangliang Yu
316bf4305feSXiangliang Yu /* clear BUSY flag */
317a92f5ec0SFrank Min MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
318bf4305feSXiangliang Yu ~VCE_STATUS__JOB_BUSY_MASK, 0);
319bf4305feSXiangliang Yu
320bf4305feSXiangliang Yu /* add end packet */
321bf4305feSXiangliang Yu memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
322bf4305feSXiangliang Yu table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
323bf4305feSXiangliang Yu header->vce_table_size = table_size;
324bf4305feSXiangliang Yu }
325bf4305feSXiangliang Yu
326257deb8cSFrank Min return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
327bf4305feSXiangliang Yu }
328bf4305feSXiangliang Yu
329c1dc356aSLeo Liu /**
330c1dc356aSLeo Liu * vce_v4_0_start - start VCE block
331c1dc356aSLeo Liu *
332c1dc356aSLeo Liu * @adev: amdgpu_device pointer
333c1dc356aSLeo Liu *
334c1dc356aSLeo Liu * Setup and start the VCE block
335c1dc356aSLeo Liu */
vce_v4_0_start(struct amdgpu_device * adev)336c1dc356aSLeo Liu static int vce_v4_0_start(struct amdgpu_device *adev)
337c1dc356aSLeo Liu {
338c1dc356aSLeo Liu struct amdgpu_ring *ring;
339c1dc356aSLeo Liu int r;
340c1dc356aSLeo Liu
341c1dc356aSLeo Liu ring = &adev->vce.ring[0];
342c1dc356aSLeo Liu
343c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
344c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
345c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
346c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
347c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
348c1dc356aSLeo Liu
349c1dc356aSLeo Liu ring = &adev->vce.ring[1];
350c1dc356aSLeo Liu
351c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
352c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
353c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
354c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
355c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
356c1dc356aSLeo Liu
357c1dc356aSLeo Liu ring = &adev->vce.ring[2];
358c1dc356aSLeo Liu
359c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
360c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
361c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
362c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
363c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
364c1dc356aSLeo Liu
365c1dc356aSLeo Liu vce_v4_0_mc_resume(adev);
366c1dc356aSLeo Liu WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
367c1dc356aSLeo Liu ~VCE_STATUS__JOB_BUSY_MASK);
368c1dc356aSLeo Liu
369c1dc356aSLeo Liu WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
370c1dc356aSLeo Liu
371c1dc356aSLeo Liu WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
372c1dc356aSLeo Liu ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
373c1dc356aSLeo Liu mdelay(100);
374c1dc356aSLeo Liu
375c1dc356aSLeo Liu r = vce_v4_0_firmware_loaded(adev);
376c1dc356aSLeo Liu
377c1dc356aSLeo Liu /* clear BUSY flag */
378c1dc356aSLeo Liu WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
379c1dc356aSLeo Liu
380c1dc356aSLeo Liu if (r) {
381c1dc356aSLeo Liu DRM_ERROR("VCE not responding, giving up!!!\n");
382c1dc356aSLeo Liu return r;
383c1dc356aSLeo Liu }
384c1dc356aSLeo Liu
385c1dc356aSLeo Liu return 0;
386c1dc356aSLeo Liu }
387c1dc356aSLeo Liu
vce_v4_0_stop(struct amdgpu_device * adev)388c1dc356aSLeo Liu static int vce_v4_0_stop(struct amdgpu_device *adev)
389c1dc356aSLeo Liu {
390c1dc356aSLeo Liu
3912c7e7835SEvan Quan /* Disable VCPU */
392c1dc356aSLeo Liu WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
393c1dc356aSLeo Liu
394c1dc356aSLeo Liu /* hold on ECPU */
395c1dc356aSLeo Liu WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
396c1dc356aSLeo Liu VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
397c1dc356aSLeo Liu ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
398c1dc356aSLeo Liu
3992c7e7835SEvan Quan /* clear VCE_STATUS */
4002c7e7835SEvan Quan WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0);
401c1dc356aSLeo Liu
402c1dc356aSLeo Liu /* Set Clock-Gating off */
403c1dc356aSLeo Liu /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
404c1dc356aSLeo Liu vce_v4_0_set_vce_sw_clock_gating(adev, false);
405c1dc356aSLeo Liu */
406c1dc356aSLeo Liu
407c1dc356aSLeo Liu return 0;
408c1dc356aSLeo Liu }
409c1dc356aSLeo Liu
vce_v4_0_early_init(void * handle)410c1dc356aSLeo Liu static int vce_v4_0_early_init(void *handle)
411c1dc356aSLeo Liu {
412c1dc356aSLeo Liu struct amdgpu_device *adev = (struct amdgpu_device *)handle;
413c1dc356aSLeo Liu
414bae5b519SXiangliang Yu if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
415bae5b519SXiangliang Yu adev->vce.num_rings = 1;
416bae5b519SXiangliang Yu else
417c1dc356aSLeo Liu adev->vce.num_rings = 3;
418c1dc356aSLeo Liu
419c1dc356aSLeo Liu vce_v4_0_set_ring_funcs(adev);
420c1dc356aSLeo Liu vce_v4_0_set_irq_funcs(adev);
421c1dc356aSLeo Liu
422c1dc356aSLeo Liu return 0;
423c1dc356aSLeo Liu }
424c1dc356aSLeo Liu
vce_v4_0_sw_init(void * handle)425c1dc356aSLeo Liu static int vce_v4_0_sw_init(void *handle)
426c1dc356aSLeo Liu {
427c1dc356aSLeo Liu struct amdgpu_device *adev = (struct amdgpu_device *)handle;
428c1dc356aSLeo Liu struct amdgpu_ring *ring;
42920acbed4SEmily Deng
430c1dc356aSLeo Liu unsigned size;
431c1dc356aSLeo Liu int r, i;
432c1dc356aSLeo Liu
4333760f76cSOak Zeng r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
434c1dc356aSLeo Liu if (r)
435c1dc356aSLeo Liu return r;
436c1dc356aSLeo Liu
4371cb4ca59SFrank Min size = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE;
438c1dc356aSLeo Liu if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
439c1dc356aSLeo Liu size += VCE_V4_0_FW_SIZE;
440c1dc356aSLeo Liu
441c1dc356aSLeo Liu r = amdgpu_vce_sw_init(adev, size);
442c1dc356aSLeo Liu if (r)
443c1dc356aSLeo Liu return r;
444c1dc356aSLeo Liu
445c1dc356aSLeo Liu if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
446c1dc356aSLeo Liu const struct common_firmware_header *hdr;
447a107ebf6SLeo Liu unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
448a107ebf6SLeo Liu
449c9533d1bSMichel Dänzer adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL);
450a107ebf6SLeo Liu if (!adev->vce.saved_bo)
451a107ebf6SLeo Liu return -ENOMEM;
452a107ebf6SLeo Liu
453c1dc356aSLeo Liu hdr = (const struct common_firmware_header *)adev->vce.fw->data;
454c1dc356aSLeo Liu adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
455c1dc356aSLeo Liu adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
456c1dc356aSLeo Liu adev->firmware.fw_size +=
457c1dc356aSLeo Liu ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
458c1dc356aSLeo Liu DRM_INFO("PSP loading VCE firmware\n");
459a107ebf6SLeo Liu } else {
460c1dc356aSLeo Liu r = amdgpu_vce_resume(adev);
461c1dc356aSLeo Liu if (r)
462c1dc356aSLeo Liu return r;
463c1dc356aSLeo Liu }
464c1dc356aSLeo Liu
465c1dc356aSLeo Liu for (i = 0; i < adev->vce.num_rings; i++) {
466080e613cSSatyajit Sahu enum amdgpu_ring_priority_level hw_prio = amdgpu_vce_get_ring_prio(i);
467080e613cSSatyajit Sahu
468c1dc356aSLeo Liu ring = &adev->vce.ring[i];
469*f4caf584SHawking Zhang ring->vm_hub = AMDGPU_MMHUB0(0);
470c1dc356aSLeo Liu sprintf(ring->name, "vce%d", i);
471bae5b519SXiangliang Yu if (amdgpu_sriov_vf(adev)) {
472bae5b519SXiangliang Yu /* DOORBELL only works under SRIOV */
473bae5b519SXiangliang Yu ring->use_doorbell = true;
4744ed11d79SFrank Min
4754ed11d79SFrank Min /* currently only use the first encoding ring for sriov,
4764ed11d79SFrank Min * so set unused location for other unused rings.
4774ed11d79SFrank Min */
478bae5b519SXiangliang Yu if (i == 0)
4799564f192SOak Zeng ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring0_1 * 2;
480bae5b519SXiangliang Yu else
4819564f192SOak Zeng ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring2_3 * 2 + 1;
482bae5b519SXiangliang Yu }
4831c6d567bSNirmoy Das r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0,
484080e613cSSatyajit Sahu hw_prio, NULL);
485c1dc356aSLeo Liu if (r)
486c1dc356aSLeo Liu return r;
487c1dc356aSLeo Liu }
488c1dc356aSLeo Liu
48920acbed4SEmily Deng
49020acbed4SEmily Deng r = amdgpu_vce_entity_init(adev);
49120acbed4SEmily Deng if (r)
49220acbed4SEmily Deng return r;
49320acbed4SEmily Deng
4947006dde2SXiangliang Yu r = amdgpu_virt_alloc_mm_table(adev);
4957006dde2SXiangliang Yu if (r)
496f5dee228SXiangliang Yu return r;
497f5dee228SXiangliang Yu
498c1dc356aSLeo Liu return r;
499c1dc356aSLeo Liu }
500c1dc356aSLeo Liu
vce_v4_0_sw_fini(void * handle)501c1dc356aSLeo Liu static int vce_v4_0_sw_fini(void *handle)
502c1dc356aSLeo Liu {
503c1dc356aSLeo Liu int r;
504c1dc356aSLeo Liu struct amdgpu_device *adev = (struct amdgpu_device *)handle;
505c1dc356aSLeo Liu
506f5dee228SXiangliang Yu /* free MM table */
5077006dde2SXiangliang Yu amdgpu_virt_free_mm_table(adev);
508f5dee228SXiangliang Yu
509a107ebf6SLeo Liu if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
510c9533d1bSMichel Dänzer kvfree(adev->vce.saved_bo);
511a107ebf6SLeo Liu adev->vce.saved_bo = NULL;
512a107ebf6SLeo Liu }
513a107ebf6SLeo Liu
514c1dc356aSLeo Liu r = amdgpu_vce_suspend(adev);
515c1dc356aSLeo Liu if (r)
516c1dc356aSLeo Liu return r;
517c1dc356aSLeo Liu
51850237287SRex Zhu return amdgpu_vce_sw_fini(adev);
519c1dc356aSLeo Liu }
520c1dc356aSLeo Liu
vce_v4_0_hw_init(void * handle)521c1dc356aSLeo Liu static int vce_v4_0_hw_init(void *handle)
522c1dc356aSLeo Liu {
523c1dc356aSLeo Liu int r, i;
524c1dc356aSLeo Liu struct amdgpu_device *adev = (struct amdgpu_device *)handle;
525c1dc356aSLeo Liu
526bf4305feSXiangliang Yu if (amdgpu_sriov_vf(adev))
527bf4305feSXiangliang Yu r = vce_v4_0_sriov_start(adev);
528bf4305feSXiangliang Yu else
529c1dc356aSLeo Liu r = vce_v4_0_start(adev);
530c1dc356aSLeo Liu if (r)
531c1dc356aSLeo Liu return r;
532c1dc356aSLeo Liu
533c1dc356aSLeo Liu for (i = 0; i < adev->vce.num_rings; i++) {
534c66ed765SAndrey Grodzovsky r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
535c1dc356aSLeo Liu if (r)
536c1dc356aSLeo Liu return r;
537c1dc356aSLeo Liu }
538c1dc356aSLeo Liu
539c1dc356aSLeo Liu DRM_INFO("VCE initialized successfully.\n");
540c1dc356aSLeo Liu
541c1dc356aSLeo Liu return 0;
542c1dc356aSLeo Liu }
543c1dc356aSLeo Liu
vce_v4_0_hw_fini(void * handle)544c1dc356aSLeo Liu static int vce_v4_0_hw_fini(void *handle)
545c1dc356aSLeo Liu {
546c1dc356aSLeo Liu struct amdgpu_device *adev = (struct amdgpu_device *)handle;
547c1dc356aSLeo Liu
548859e4659SEvan Quan cancel_delayed_work_sync(&adev->vce.idle_work);
549859e4659SEvan Quan
5505dd696aeSTrigger Huang if (!amdgpu_sriov_vf(adev)) {
551c1dc356aSLeo Liu /* vce_v4_0_wait_for_idle(handle); */
552c1dc356aSLeo Liu vce_v4_0_stop(adev);
5535dd696aeSTrigger Huang } else {
5545dd696aeSTrigger Huang /* full access mode, so don't touch any VCE register */
5555dd696aeSTrigger Huang DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
5565dd696aeSTrigger Huang }
5575dd696aeSTrigger Huang
558c1dc356aSLeo Liu return 0;
559c1dc356aSLeo Liu }
560c1dc356aSLeo Liu
vce_v4_0_suspend(void * handle)561c1dc356aSLeo Liu static int vce_v4_0_suspend(void *handle)
562c1dc356aSLeo Liu {
563c1dc356aSLeo Liu struct amdgpu_device *adev = (struct amdgpu_device *)handle;
564f89f8c6bSAndrey Grodzovsky int r, idx;
565a107ebf6SLeo Liu
566a107ebf6SLeo Liu if (adev->vce.vcpu_bo == NULL)
567a107ebf6SLeo Liu return 0;
568a107ebf6SLeo Liu
569c58a863bSGuchun Chen if (drm_dev_enter(adev_to_drm(adev), &idx)) {
570a107ebf6SLeo Liu if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
571a107ebf6SLeo Liu unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
572a107ebf6SLeo Liu void *ptr = adev->vce.cpu_addr;
573a107ebf6SLeo Liu
574a107ebf6SLeo Liu memcpy_fromio(adev->vce.saved_bo, ptr, size);
575a107ebf6SLeo Liu }
576f89f8c6bSAndrey Grodzovsky drm_dev_exit(idx);
577f89f8c6bSAndrey Grodzovsky }
578c1dc356aSLeo Liu
579d82e2c24SAndrey Grodzovsky /*
580d82e2c24SAndrey Grodzovsky * Proper cleanups before halting the HW engine:
581d82e2c24SAndrey Grodzovsky * - cancel the delayed idle work
582d82e2c24SAndrey Grodzovsky * - enable powergating
583d82e2c24SAndrey Grodzovsky * - enable clockgating
584d82e2c24SAndrey Grodzovsky * - disable dpm
585d82e2c24SAndrey Grodzovsky *
586d82e2c24SAndrey Grodzovsky * TODO: to align with the VCN implementation, move the
587d82e2c24SAndrey Grodzovsky * jobs for clockgating/powergating/dpm setting to
588d82e2c24SAndrey Grodzovsky * ->set_powergating_state().
589d82e2c24SAndrey Grodzovsky */
590d82e2c24SAndrey Grodzovsky cancel_delayed_work_sync(&adev->vce.idle_work);
591d82e2c24SAndrey Grodzovsky
592d82e2c24SAndrey Grodzovsky if (adev->pm.dpm_enabled) {
593d82e2c24SAndrey Grodzovsky amdgpu_dpm_enable_vce(adev, false);
594d82e2c24SAndrey Grodzovsky } else {
595d82e2c24SAndrey Grodzovsky amdgpu_asic_set_vce_clocks(adev, 0, 0);
596d82e2c24SAndrey Grodzovsky amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
597d82e2c24SAndrey Grodzovsky AMD_PG_STATE_GATE);
598d82e2c24SAndrey Grodzovsky amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
599d82e2c24SAndrey Grodzovsky AMD_CG_STATE_GATE);
600d82e2c24SAndrey Grodzovsky }
601d82e2c24SAndrey Grodzovsky
602c1dc356aSLeo Liu r = vce_v4_0_hw_fini(adev);
603c1dc356aSLeo Liu if (r)
604c1dc356aSLeo Liu return r;
605c1dc356aSLeo Liu
60650237287SRex Zhu return amdgpu_vce_suspend(adev);
607c1dc356aSLeo Liu }
608c1dc356aSLeo Liu
vce_v4_0_resume(void * handle)609c1dc356aSLeo Liu static int vce_v4_0_resume(void *handle)
610c1dc356aSLeo Liu {
611c1dc356aSLeo Liu struct amdgpu_device *adev = (struct amdgpu_device *)handle;
612f89f8c6bSAndrey Grodzovsky int r, idx;
613c1dc356aSLeo Liu
614a107ebf6SLeo Liu if (adev->vce.vcpu_bo == NULL)
615a107ebf6SLeo Liu return -EINVAL;
616a107ebf6SLeo Liu
617a107ebf6SLeo Liu if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
618f89f8c6bSAndrey Grodzovsky
619c58a863bSGuchun Chen if (drm_dev_enter(adev_to_drm(adev), &idx)) {
620a107ebf6SLeo Liu unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
621a107ebf6SLeo Liu void *ptr = adev->vce.cpu_addr;
622a107ebf6SLeo Liu
623a107ebf6SLeo Liu memcpy_toio(ptr, adev->vce.saved_bo, size);
624f89f8c6bSAndrey Grodzovsky drm_dev_exit(idx);
625f89f8c6bSAndrey Grodzovsky }
626a107ebf6SLeo Liu } else {
627c1dc356aSLeo Liu r = amdgpu_vce_resume(adev);
628c1dc356aSLeo Liu if (r)
629c1dc356aSLeo Liu return r;
630a107ebf6SLeo Liu }
631c1dc356aSLeo Liu
63250237287SRex Zhu return vce_v4_0_hw_init(adev);
633c1dc356aSLeo Liu }
634c1dc356aSLeo Liu
vce_v4_0_mc_resume(struct amdgpu_device * adev)635c1dc356aSLeo Liu static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
636c1dc356aSLeo Liu {
637c1dc356aSLeo Liu uint32_t offset, size;
638aa5873dcSJames Zhu uint64_t tmr_mc_addr;
639c1dc356aSLeo Liu
640c1dc356aSLeo Liu WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
641c1dc356aSLeo Liu WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
642c1dc356aSLeo Liu WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
643c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
644c1dc356aSLeo Liu
645c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
646c1dc356aSLeo Liu WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
647c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
648c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
649c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
650c1dc356aSLeo Liu
651aa5873dcSJames Zhu offset = AMDGPU_VCE_FIRMWARE_OFFSET;
652aa5873dcSJames Zhu
653c1dc356aSLeo Liu if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
654aa5873dcSJames Zhu tmr_mc_addr = (uint64_t)(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi) << 32 |
655aa5873dcSJames Zhu adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
656c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
657aa5873dcSJames Zhu (tmr_mc_addr >> 8));
658c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
659aa5873dcSJames Zhu (tmr_mc_addr >> 40) & 0xff);
660aa5873dcSJames Zhu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
661c1dc356aSLeo Liu } else {
662c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
663c1dc356aSLeo Liu (adev->vce.gpu_addr >> 8));
664c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
665c1dc356aSLeo Liu (adev->vce.gpu_addr >> 40) & 0xff);
666aa5873dcSJames Zhu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
667c1dc356aSLeo Liu }
668c1dc356aSLeo Liu
669c1dc356aSLeo Liu size = VCE_V4_0_FW_SIZE;
670c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
671c1dc356aSLeo Liu
672c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
673c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
674c1dc356aSLeo Liu offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
675c1dc356aSLeo Liu size = VCE_V4_0_STACK_SIZE;
676c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
677c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
678c1dc356aSLeo Liu
679c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
680c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
681c1dc356aSLeo Liu offset += size;
682c1dc356aSLeo Liu size = VCE_V4_0_DATA_SIZE;
683c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
684c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
685c1dc356aSLeo Liu
686c1dc356aSLeo Liu WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
687c1dc356aSLeo Liu WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
688c1dc356aSLeo Liu VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
689c1dc356aSLeo Liu ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
690c1dc356aSLeo Liu }
691c1dc356aSLeo Liu
vce_v4_0_set_clockgating_state(void * handle,enum amd_clockgating_state state)692c1dc356aSLeo Liu static int vce_v4_0_set_clockgating_state(void *handle,
693c1dc356aSLeo Liu enum amd_clockgating_state state)
694c1dc356aSLeo Liu {
695c1dc356aSLeo Liu /* needed for driver unload*/
696c1dc356aSLeo Liu return 0;
697c1dc356aSLeo Liu }
698c1dc356aSLeo Liu
699c1dc356aSLeo Liu #if 0
700c1dc356aSLeo Liu static bool vce_v4_0_is_idle(void *handle)
701c1dc356aSLeo Liu {
702c1dc356aSLeo Liu struct amdgpu_device *adev = (struct amdgpu_device *)handle;
703c1dc356aSLeo Liu u32 mask = 0;
704c1dc356aSLeo Liu
705c1dc356aSLeo Liu mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
706c1dc356aSLeo Liu mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
707c1dc356aSLeo Liu
708c1dc356aSLeo Liu return !(RREG32(mmSRBM_STATUS2) & mask);
709c1dc356aSLeo Liu }
710c1dc356aSLeo Liu
711c1dc356aSLeo Liu static int vce_v4_0_wait_for_idle(void *handle)
712c1dc356aSLeo Liu {
713c1dc356aSLeo Liu unsigned i;
714c1dc356aSLeo Liu struct amdgpu_device *adev = (struct amdgpu_device *)handle;
715c1dc356aSLeo Liu
716c1dc356aSLeo Liu for (i = 0; i < adev->usec_timeout; i++)
717c1dc356aSLeo Liu if (vce_v4_0_is_idle(handle))
718c1dc356aSLeo Liu return 0;
719c1dc356aSLeo Liu
720c1dc356aSLeo Liu return -ETIMEDOUT;
721c1dc356aSLeo Liu }
722c1dc356aSLeo Liu
723c1dc356aSLeo Liu #define VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK 0x00000008L /* AUTO_BUSY */
724c1dc356aSLeo Liu #define VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK 0x00000010L /* RB0_BUSY */
725c1dc356aSLeo Liu #define VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK 0x00000020L /* RB1_BUSY */
726c1dc356aSLeo Liu #define AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
727c1dc356aSLeo Liu VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
728c1dc356aSLeo Liu
729c1dc356aSLeo Liu static bool vce_v4_0_check_soft_reset(void *handle)
730c1dc356aSLeo Liu {
731c1dc356aSLeo Liu struct amdgpu_device *adev = (struct amdgpu_device *)handle;
732c1dc356aSLeo Liu u32 srbm_soft_reset = 0;
733c1dc356aSLeo Liu
734c1dc356aSLeo Liu /* According to VCE team , we should use VCE_STATUS instead
735c1dc356aSLeo Liu * SRBM_STATUS.VCE_BUSY bit for busy status checking.
736c1dc356aSLeo Liu * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
737c1dc356aSLeo Liu * instance's registers are accessed
738c1dc356aSLeo Liu * (0 for 1st instance, 10 for 2nd instance).
739c1dc356aSLeo Liu *
740c1dc356aSLeo Liu *VCE_STATUS
741c1dc356aSLeo Liu *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 | |FW_LOADED|JOB |
742c1dc356aSLeo Liu *|----+----+-----------+----+----+----+----------+---------+----|
743c1dc356aSLeo Liu *|bit8|bit7| bit6 |bit5|bit4|bit3| bit2 | bit1 |bit0|
744c1dc356aSLeo Liu *
745c1dc356aSLeo Liu * VCE team suggest use bit 3--bit 6 for busy status check
746c1dc356aSLeo Liu */
747c1dc356aSLeo Liu mutex_lock(&adev->grbm_idx_mutex);
748c1dc356aSLeo Liu WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
749c1dc356aSLeo Liu if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
750c1dc356aSLeo Liu srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
751c1dc356aSLeo Liu srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
752c1dc356aSLeo Liu }
753c1dc356aSLeo Liu WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
754c1dc356aSLeo Liu if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
755c1dc356aSLeo Liu srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
756c1dc356aSLeo Liu srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
757c1dc356aSLeo Liu }
758c1dc356aSLeo Liu WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
759c1dc356aSLeo Liu mutex_unlock(&adev->grbm_idx_mutex);
760c1dc356aSLeo Liu
761c1dc356aSLeo Liu if (srbm_soft_reset) {
762c1dc356aSLeo Liu adev->vce.srbm_soft_reset = srbm_soft_reset;
763c1dc356aSLeo Liu return true;
764c1dc356aSLeo Liu } else {
765c1dc356aSLeo Liu adev->vce.srbm_soft_reset = 0;
766c1dc356aSLeo Liu return false;
767c1dc356aSLeo Liu }
768c1dc356aSLeo Liu }
769c1dc356aSLeo Liu
770c1dc356aSLeo Liu static int vce_v4_0_soft_reset(void *handle)
771c1dc356aSLeo Liu {
772c1dc356aSLeo Liu struct amdgpu_device *adev = (struct amdgpu_device *)handle;
773c1dc356aSLeo Liu u32 srbm_soft_reset;
774c1dc356aSLeo Liu
775c1dc356aSLeo Liu if (!adev->vce.srbm_soft_reset)
776c1dc356aSLeo Liu return 0;
777c1dc356aSLeo Liu srbm_soft_reset = adev->vce.srbm_soft_reset;
778c1dc356aSLeo Liu
779c1dc356aSLeo Liu if (srbm_soft_reset) {
780c1dc356aSLeo Liu u32 tmp;
781c1dc356aSLeo Liu
782c1dc356aSLeo Liu tmp = RREG32(mmSRBM_SOFT_RESET);
783c1dc356aSLeo Liu tmp |= srbm_soft_reset;
784c1dc356aSLeo Liu dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
785c1dc356aSLeo Liu WREG32(mmSRBM_SOFT_RESET, tmp);
786c1dc356aSLeo Liu tmp = RREG32(mmSRBM_SOFT_RESET);
787c1dc356aSLeo Liu
788c1dc356aSLeo Liu udelay(50);
789c1dc356aSLeo Liu
790c1dc356aSLeo Liu tmp &= ~srbm_soft_reset;
791c1dc356aSLeo Liu WREG32(mmSRBM_SOFT_RESET, tmp);
792c1dc356aSLeo Liu tmp = RREG32(mmSRBM_SOFT_RESET);
793c1dc356aSLeo Liu
794c1dc356aSLeo Liu /* Wait a little for things to settle down */
795c1dc356aSLeo Liu udelay(50);
796c1dc356aSLeo Liu }
797c1dc356aSLeo Liu
798c1dc356aSLeo Liu return 0;
799c1dc356aSLeo Liu }
800c1dc356aSLeo Liu
801c1dc356aSLeo Liu static int vce_v4_0_pre_soft_reset(void *handle)
802c1dc356aSLeo Liu {
803c1dc356aSLeo Liu struct amdgpu_device *adev = (struct amdgpu_device *)handle;
804c1dc356aSLeo Liu
805c1dc356aSLeo Liu if (!adev->vce.srbm_soft_reset)
806c1dc356aSLeo Liu return 0;
807c1dc356aSLeo Liu
808c1dc356aSLeo Liu mdelay(5);
809c1dc356aSLeo Liu
810c1dc356aSLeo Liu return vce_v4_0_suspend(adev);
811c1dc356aSLeo Liu }
812c1dc356aSLeo Liu
813c1dc356aSLeo Liu
814c1dc356aSLeo Liu static int vce_v4_0_post_soft_reset(void *handle)
815c1dc356aSLeo Liu {
816c1dc356aSLeo Liu struct amdgpu_device *adev = (struct amdgpu_device *)handle;
817c1dc356aSLeo Liu
818c1dc356aSLeo Liu if (!adev->vce.srbm_soft_reset)
819c1dc356aSLeo Liu return 0;
820c1dc356aSLeo Liu
821c1dc356aSLeo Liu mdelay(5);
822c1dc356aSLeo Liu
823c1dc356aSLeo Liu return vce_v4_0_resume(adev);
824c1dc356aSLeo Liu }
825c1dc356aSLeo Liu
826c1dc356aSLeo Liu static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
827c1dc356aSLeo Liu {
828c1dc356aSLeo Liu u32 tmp, data;
829c1dc356aSLeo Liu
830c1dc356aSLeo Liu tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
831c1dc356aSLeo Liu if (override)
832c1dc356aSLeo Liu data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
833c1dc356aSLeo Liu else
834c1dc356aSLeo Liu data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
835c1dc356aSLeo Liu
836c1dc356aSLeo Liu if (tmp != data)
837c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
838c1dc356aSLeo Liu }
839c1dc356aSLeo Liu
840c1dc356aSLeo Liu static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
841c1dc356aSLeo Liu bool gated)
842c1dc356aSLeo Liu {
843c1dc356aSLeo Liu u32 data;
844c1dc356aSLeo Liu
845c1dc356aSLeo Liu /* Set Override to disable Clock Gating */
846c1dc356aSLeo Liu vce_v4_0_override_vce_clock_gating(adev, true);
847c1dc356aSLeo Liu
848c1dc356aSLeo Liu /* This function enables MGCG which is controlled by firmware.
849c1dc356aSLeo Liu With the clocks in the gated state the core is still
850c1dc356aSLeo Liu accessible but the firmware will throttle the clocks on the
851c1dc356aSLeo Liu fly as necessary.
852c1dc356aSLeo Liu */
853c1dc356aSLeo Liu if (gated) {
854c1dc356aSLeo Liu data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
855c1dc356aSLeo Liu data |= 0x1ff;
856c1dc356aSLeo Liu data &= ~0xef0000;
857c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
858c1dc356aSLeo Liu
859c1dc356aSLeo Liu data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
860c1dc356aSLeo Liu data |= 0x3ff000;
861c1dc356aSLeo Liu data &= ~0xffc00000;
862c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
863c1dc356aSLeo Liu
864c1dc356aSLeo Liu data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
865c1dc356aSLeo Liu data |= 0x2;
866c1dc356aSLeo Liu data &= ~0x00010000;
867c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
868c1dc356aSLeo Liu
869c1dc356aSLeo Liu data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
870c1dc356aSLeo Liu data |= 0x37f;
871c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
872c1dc356aSLeo Liu
873c1dc356aSLeo Liu data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
874c1dc356aSLeo Liu data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
875c1dc356aSLeo Liu VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
876c1dc356aSLeo Liu VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK |
877c1dc356aSLeo Liu 0x8;
878c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
879c1dc356aSLeo Liu } else {
880c1dc356aSLeo Liu data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
881c1dc356aSLeo Liu data &= ~0x80010;
882c1dc356aSLeo Liu data |= 0xe70008;
883c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
884c1dc356aSLeo Liu
885c1dc356aSLeo Liu data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
886c1dc356aSLeo Liu data |= 0xffc00000;
887c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
888c1dc356aSLeo Liu
889c1dc356aSLeo Liu data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
890c1dc356aSLeo Liu data |= 0x10000;
891c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
892c1dc356aSLeo Liu
893c1dc356aSLeo Liu data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
894c1dc356aSLeo Liu data &= ~0xffc00000;
895c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
896c1dc356aSLeo Liu
897c1dc356aSLeo Liu data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
898c1dc356aSLeo Liu data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
899c1dc356aSLeo Liu VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
900c1dc356aSLeo Liu VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK |
901c1dc356aSLeo Liu 0x8);
902c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
903c1dc356aSLeo Liu }
904c1dc356aSLeo Liu vce_v4_0_override_vce_clock_gating(adev, false);
905c1dc356aSLeo Liu }
906c1dc356aSLeo Liu
907c1dc356aSLeo Liu static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
908c1dc356aSLeo Liu {
909c1dc356aSLeo Liu u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
910c1dc356aSLeo Liu
911c1dc356aSLeo Liu if (enable)
912c1dc356aSLeo Liu tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
913c1dc356aSLeo Liu else
914c1dc356aSLeo Liu tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
915c1dc356aSLeo Liu
916c1dc356aSLeo Liu WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
917c1dc356aSLeo Liu }
918c1dc356aSLeo Liu
919c1dc356aSLeo Liu static int vce_v4_0_set_clockgating_state(void *handle,
920c1dc356aSLeo Liu enum amd_clockgating_state state)
921c1dc356aSLeo Liu {
922c1dc356aSLeo Liu struct amdgpu_device *adev = (struct amdgpu_device *)handle;
923a9d4fe2fSNirmoy Das bool enable = (state == AMD_CG_STATE_GATE);
924c1dc356aSLeo Liu int i;
925c1dc356aSLeo Liu
926c1dc356aSLeo Liu if ((adev->asic_type == CHIP_POLARIS10) ||
927c1dc356aSLeo Liu (adev->asic_type == CHIP_TONGA) ||
928c1dc356aSLeo Liu (adev->asic_type == CHIP_FIJI))
929c1dc356aSLeo Liu vce_v4_0_set_bypass_mode(adev, enable);
930c1dc356aSLeo Liu
931c1dc356aSLeo Liu if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
932c1dc356aSLeo Liu return 0;
933c1dc356aSLeo Liu
934c1dc356aSLeo Liu mutex_lock(&adev->grbm_idx_mutex);
935c1dc356aSLeo Liu for (i = 0; i < 2; i++) {
936c1dc356aSLeo Liu /* Program VCE Instance 0 or 1 if not harvested */
937c1dc356aSLeo Liu if (adev->vce.harvest_config & (1 << i))
938c1dc356aSLeo Liu continue;
939c1dc356aSLeo Liu
940c1dc356aSLeo Liu WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
941c1dc356aSLeo Liu
942c1dc356aSLeo Liu if (enable) {
943c1dc356aSLeo Liu /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
944c1dc356aSLeo Liu uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
945c1dc356aSLeo Liu data &= ~(0xf | 0xff0);
946c1dc356aSLeo Liu data |= ((0x0 << 0) | (0x04 << 4));
947c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
948c1dc356aSLeo Liu
949c1dc356aSLeo Liu /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
950c1dc356aSLeo Liu data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
951c1dc356aSLeo Liu data &= ~(0xf | 0xff0);
952c1dc356aSLeo Liu data |= ((0x0 << 0) | (0x04 << 4));
953c1dc356aSLeo Liu WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
954c1dc356aSLeo Liu }
955c1dc356aSLeo Liu
956c1dc356aSLeo Liu vce_v4_0_set_vce_sw_clock_gating(adev, enable);
957c1dc356aSLeo Liu }
958c1dc356aSLeo Liu
959c1dc356aSLeo Liu WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
960c1dc356aSLeo Liu mutex_unlock(&adev->grbm_idx_mutex);
961c1dc356aSLeo Liu
962c1dc356aSLeo Liu return 0;
963c1dc356aSLeo Liu }
9642c7e7835SEvan Quan #endif
965c1dc356aSLeo Liu
vce_v4_0_set_powergating_state(void * handle,enum amd_powergating_state state)966c1dc356aSLeo Liu static int vce_v4_0_set_powergating_state(void *handle,
967c1dc356aSLeo Liu enum amd_powergating_state state)
968c1dc356aSLeo Liu {
969c1dc356aSLeo Liu /* This doesn't actually powergate the VCE block.
970c1dc356aSLeo Liu * That's done in the dpm code via the SMC. This
971c1dc356aSLeo Liu * just re-inits the block as necessary. The actual
972c1dc356aSLeo Liu * gating still happens in the dpm code. We should
973c1dc356aSLeo Liu * revisit this when there is a cleaner line between
974c1dc356aSLeo Liu * the smc and the hw blocks
975c1dc356aSLeo Liu */
976c1dc356aSLeo Liu struct amdgpu_device *adev = (struct amdgpu_device *)handle;
977c1dc356aSLeo Liu
978c1dc356aSLeo Liu if (state == AMD_PG_STATE_GATE)
9792c7e7835SEvan Quan return vce_v4_0_stop(adev);
980c1dc356aSLeo Liu else
981c1dc356aSLeo Liu return vce_v4_0_start(adev);
982c1dc356aSLeo Liu }
983c1dc356aSLeo Liu
vce_v4_0_ring_emit_ib(struct amdgpu_ring * ring,struct amdgpu_job * job,struct amdgpu_ib * ib,uint32_t flags)98434955e03SRex Zhu static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
985c4c905ecSJack Xiao struct amdgpu_ib *ib, uint32_t flags)
986c1dc356aSLeo Liu {
98734955e03SRex Zhu unsigned vmid = AMDGPU_JOB_GET_VMID(job);
98834955e03SRex Zhu
989c1dc356aSLeo Liu amdgpu_ring_write(ring, VCE_CMD_IB_VM);
990c4f46f22SChristian König amdgpu_ring_write(ring, vmid);
991c1dc356aSLeo Liu amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
992c1dc356aSLeo Liu amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
993c1dc356aSLeo Liu amdgpu_ring_write(ring, ib->length_dw);
994c1dc356aSLeo Liu }
995c1dc356aSLeo Liu
vce_v4_0_ring_emit_fence(struct amdgpu_ring * ring,u64 addr,u64 seq,unsigned flags)996c1dc356aSLeo Liu static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
997c1dc356aSLeo Liu u64 seq, unsigned flags)
998c1dc356aSLeo Liu {
999c1dc356aSLeo Liu WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
1000c1dc356aSLeo Liu
1001c1dc356aSLeo Liu amdgpu_ring_write(ring, VCE_CMD_FENCE);
1002c1dc356aSLeo Liu amdgpu_ring_write(ring, addr);
1003c1dc356aSLeo Liu amdgpu_ring_write(ring, upper_32_bits(addr));
1004c1dc356aSLeo Liu amdgpu_ring_write(ring, seq);
1005c1dc356aSLeo Liu amdgpu_ring_write(ring, VCE_CMD_TRAP);
1006c1dc356aSLeo Liu }
1007c1dc356aSLeo Liu
vce_v4_0_ring_insert_end(struct amdgpu_ring * ring)1008c1dc356aSLeo Liu static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
1009c1dc356aSLeo Liu {
1010c1dc356aSLeo Liu amdgpu_ring_write(ring, VCE_CMD_END);
1011c1dc356aSLeo Liu }
1012c1dc356aSLeo Liu
vce_v4_0_emit_reg_wait(struct amdgpu_ring * ring,uint32_t reg,uint32_t val,uint32_t mask)1013d949315aSChristian König static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
1014d949315aSChristian König uint32_t val, uint32_t mask)
1015d949315aSChristian König {
1016d949315aSChristian König amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
1017d949315aSChristian König amdgpu_ring_write(ring, reg << 2);
1018d949315aSChristian König amdgpu_ring_write(ring, mask);
1019d949315aSChristian König amdgpu_ring_write(ring, val);
1020d949315aSChristian König }
1021d949315aSChristian König
vce_v4_0_emit_vm_flush(struct amdgpu_ring * ring,unsigned int vmid,uint64_t pd_addr)1022c1dc356aSLeo Liu static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
1023c633c00bSChristian König unsigned int vmid, uint64_t pd_addr)
1024c1dc356aSLeo Liu {
10250530553bSLe Ma struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
1026c1dc356aSLeo Liu
1027c633c00bSChristian König pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
1028c1dc356aSLeo Liu
10299096d6e5SChristian König /* wait for reg writes */
10302fcaca94SHuang Rui vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 +
10312fcaca94SHuang Rui vmid * hub->ctx_addr_distance,
1032d949315aSChristian König lower_32_bits(pd_addr), 0xffffffff);
1033c1dc356aSLeo Liu }
1034c1dc356aSLeo Liu
vce_v4_0_emit_wreg(struct amdgpu_ring * ring,uint32_t reg,uint32_t val)1035cf912c8fSChristian König static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring,
1036cf912c8fSChristian König uint32_t reg, uint32_t val)
1037cf912c8fSChristian König {
1038cf912c8fSChristian König amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
1039cf912c8fSChristian König amdgpu_ring_write(ring, reg << 2);
1040cf912c8fSChristian König amdgpu_ring_write(ring, val);
1041cf912c8fSChristian König }
1042cf912c8fSChristian König
vce_v4_0_set_interrupt_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)1043c1dc356aSLeo Liu static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
1044c1dc356aSLeo Liu struct amdgpu_irq_src *source,
1045c1dc356aSLeo Liu unsigned type,
1046c1dc356aSLeo Liu enum amdgpu_interrupt_state state)
1047c1dc356aSLeo Liu {
1048c1dc356aSLeo Liu uint32_t val = 0;
1049c1dc356aSLeo Liu
1050088c69aaSFrank Min if (!amdgpu_sriov_vf(adev)) {
1051c1dc356aSLeo Liu if (state == AMDGPU_IRQ_STATE_ENABLE)
1052c1dc356aSLeo Liu val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
1053c1dc356aSLeo Liu
1054c1dc356aSLeo Liu WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
1055c1dc356aSLeo Liu ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
1056088c69aaSFrank Min }
1057c1dc356aSLeo Liu return 0;
1058c1dc356aSLeo Liu }
1059c1dc356aSLeo Liu
vce_v4_0_process_interrupt(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)1060c1dc356aSLeo Liu static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1061c1dc356aSLeo Liu struct amdgpu_irq_src *source,
1062c1dc356aSLeo Liu struct amdgpu_iv_entry *entry)
1063c1dc356aSLeo Liu {
1064c1dc356aSLeo Liu DRM_DEBUG("IH: VCE\n");
1065c1dc356aSLeo Liu
1066c1dc356aSLeo Liu switch (entry->src_data[0]) {
1067c1dc356aSLeo Liu case 0:
1068c1dc356aSLeo Liu case 1:
1069c1dc356aSLeo Liu case 2:
1070c1dc356aSLeo Liu amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
1071c1dc356aSLeo Liu break;
1072c1dc356aSLeo Liu default:
1073c1dc356aSLeo Liu DRM_ERROR("Unhandled interrupt: %d %d\n",
1074c1dc356aSLeo Liu entry->src_id, entry->src_data[0]);
1075c1dc356aSLeo Liu break;
1076c1dc356aSLeo Liu }
1077c1dc356aSLeo Liu
1078c1dc356aSLeo Liu return 0;
1079c1dc356aSLeo Liu }
1080c1dc356aSLeo Liu
1081c1dc356aSLeo Liu const struct amd_ip_funcs vce_v4_0_ip_funcs = {
1082c1dc356aSLeo Liu .name = "vce_v4_0",
1083c1dc356aSLeo Liu .early_init = vce_v4_0_early_init,
1084c1dc356aSLeo Liu .late_init = NULL,
1085c1dc356aSLeo Liu .sw_init = vce_v4_0_sw_init,
1086c1dc356aSLeo Liu .sw_fini = vce_v4_0_sw_fini,
1087c1dc356aSLeo Liu .hw_init = vce_v4_0_hw_init,
1088c1dc356aSLeo Liu .hw_fini = vce_v4_0_hw_fini,
1089c1dc356aSLeo Liu .suspend = vce_v4_0_suspend,
1090c1dc356aSLeo Liu .resume = vce_v4_0_resume,
1091c1dc356aSLeo Liu .is_idle = NULL /* vce_v4_0_is_idle */,
1092c1dc356aSLeo Liu .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1093c1dc356aSLeo Liu .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1094c1dc356aSLeo Liu .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1095c1dc356aSLeo Liu .soft_reset = NULL /* vce_v4_0_soft_reset */,
1096c1dc356aSLeo Liu .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1097c1dc356aSLeo Liu .set_clockgating_state = vce_v4_0_set_clockgating_state,
10982c7e7835SEvan Quan .set_powergating_state = vce_v4_0_set_powergating_state,
1099c1dc356aSLeo Liu };
1100c1dc356aSLeo Liu
1101c1dc356aSLeo Liu static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1102c1dc356aSLeo Liu .type = AMDGPU_RING_TYPE_VCE,
1103c1dc356aSLeo Liu .align_mask = 0x3f,
1104c1dc356aSLeo Liu .nop = VCE_CMD_NO_OP,
1105c1dc356aSLeo Liu .support_64bit_ptrs = false,
1106f61334b5SLeo Liu .no_user_fence = true,
1107c1dc356aSLeo Liu .get_rptr = vce_v4_0_ring_get_rptr,
1108c1dc356aSLeo Liu .get_wptr = vce_v4_0_ring_get_wptr,
1109c1dc356aSLeo Liu .set_wptr = vce_v4_0_ring_set_wptr,
1110c1dc356aSLeo Liu .parse_cs = amdgpu_vce_ring_parse_cs_vm,
1111c1dc356aSLeo Liu .emit_frame_size =
1112f732b6b3SChristian König SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1113f732b6b3SChristian König SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1114f732b6b3SChristian König 4 + /* vce_v4_0_emit_vm_flush */
1115c1dc356aSLeo Liu 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1116c1dc356aSLeo Liu 1, /* vce_v4_0_ring_insert_end */
1117c1dc356aSLeo Liu .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1118c1dc356aSLeo Liu .emit_ib = vce_v4_0_ring_emit_ib,
1119c1dc356aSLeo Liu .emit_vm_flush = vce_v4_0_emit_vm_flush,
1120c1dc356aSLeo Liu .emit_fence = vce_v4_0_ring_emit_fence,
1121c1dc356aSLeo Liu .test_ring = amdgpu_vce_ring_test_ring,
1122c1dc356aSLeo Liu .test_ib = amdgpu_vce_ring_test_ib,
1123c1dc356aSLeo Liu .insert_nop = amdgpu_ring_insert_nop,
1124c1dc356aSLeo Liu .insert_end = vce_v4_0_ring_insert_end,
1125c1dc356aSLeo Liu .pad_ib = amdgpu_ring_generic_pad_ib,
1126c1dc356aSLeo Liu .begin_use = amdgpu_vce_ring_begin_use,
1127c1dc356aSLeo Liu .end_use = amdgpu_vce_ring_end_use,
1128cf912c8fSChristian König .emit_wreg = vce_v4_0_emit_wreg,
1129d949315aSChristian König .emit_reg_wait = vce_v4_0_emit_reg_wait,
11303fa0b1cbSAlex Deucher .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1131c1dc356aSLeo Liu };
1132c1dc356aSLeo Liu
vce_v4_0_set_ring_funcs(struct amdgpu_device * adev)1133c1dc356aSLeo Liu static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1134c1dc356aSLeo Liu {
1135c1dc356aSLeo Liu int i;
1136c1dc356aSLeo Liu
11375d4af988SAlex Deucher for (i = 0; i < adev->vce.num_rings; i++) {
1138c1dc356aSLeo Liu adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
11395d4af988SAlex Deucher adev->vce.ring[i].me = i;
11405d4af988SAlex Deucher }
1141c1dc356aSLeo Liu DRM_INFO("VCE enabled in VM mode\n");
1142c1dc356aSLeo Liu }
1143c1dc356aSLeo Liu
1144c1dc356aSLeo Liu static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1145c1dc356aSLeo Liu .set = vce_v4_0_set_interrupt_state,
1146c1dc356aSLeo Liu .process = vce_v4_0_process_interrupt,
1147c1dc356aSLeo Liu };
1148c1dc356aSLeo Liu
vce_v4_0_set_irq_funcs(struct amdgpu_device * adev)1149c1dc356aSLeo Liu static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1150c1dc356aSLeo Liu {
1151c1dc356aSLeo Liu adev->vce.irq.num_types = 1;
1152c1dc356aSLeo Liu adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1153c1dc356aSLeo Liu };
1154c1dc356aSLeo Liu
1155c1dc356aSLeo Liu const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1156c1dc356aSLeo Liu {
1157c1dc356aSLeo Liu .type = AMD_IP_BLOCK_TYPE_VCE,
1158c1dc356aSLeo Liu .major = 4,
1159c1dc356aSLeo Liu .minor = 0,
1160c1dc356aSLeo Liu .rev = 0,
1161c1dc356aSLeo Liu .funcs = &vce_v4_0_ip_funcs,
1162c1dc356aSLeo Liu };
1163