xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c (revision e6b9d8eddb1772d99a676a906d42865293934edd)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  */
26 
27 #include <linux/firmware.h>
28 #include <drm/drm_drv.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_vce.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "soc15_common.h"
35 #include "mmsch_v1_0.h"
36 
37 #include "vce/vce_4_0_offset.h"
38 #include "vce/vce_4_0_default.h"
39 #include "vce/vce_4_0_sh_mask.h"
40 #include "mmhub/mmhub_1_0_offset.h"
41 #include "mmhub/mmhub_1_0_sh_mask.h"
42 
43 #include "ivsrcid/vce/irqsrcs_vce_4_0.h"
44 
45 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
46 
47 #define VCE_V4_0_FW_SIZE	(384 * 1024)
48 #define VCE_V4_0_STACK_SIZE	(64 * 1024)
49 #define VCE_V4_0_DATA_SIZE	((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
50 
51 static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
52 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
53 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
54 
55 /**
56  * vce_v4_0_ring_get_rptr - get read pointer
57  *
58  * @ring: amdgpu_ring pointer
59  *
60  * Returns the current hardware read pointer
61  */
62 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
63 {
64 	struct amdgpu_device *adev = ring->adev;
65 
66 	if (ring->me == 0)
67 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
68 	else if (ring->me == 1)
69 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
70 	else
71 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
72 }
73 
74 /**
75  * vce_v4_0_ring_get_wptr - get write pointer
76  *
77  * @ring: amdgpu_ring pointer
78  *
79  * Returns the current hardware write pointer
80  */
81 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
82 {
83 	struct amdgpu_device *adev = ring->adev;
84 
85 	if (ring->use_doorbell)
86 		return *ring->wptr_cpu_addr;
87 
88 	if (ring->me == 0)
89 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
90 	else if (ring->me == 1)
91 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
92 	else
93 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
94 }
95 
96 /**
97  * vce_v4_0_ring_set_wptr - set write pointer
98  *
99  * @ring: amdgpu_ring pointer
100  *
101  * Commits the write pointer to the hardware
102  */
103 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
104 {
105 	struct amdgpu_device *adev = ring->adev;
106 
107 	if (ring->use_doorbell) {
108 		/* XXX check if swapping is necessary on BE */
109 		*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
110 		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
111 		return;
112 	}
113 
114 	if (ring->me == 0)
115 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
116 			lower_32_bits(ring->wptr));
117 	else if (ring->me == 1)
118 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
119 			lower_32_bits(ring->wptr));
120 	else
121 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
122 			lower_32_bits(ring->wptr));
123 }
124 
125 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
126 {
127 	int i, j;
128 
129 	for (i = 0; i < 10; ++i) {
130 		for (j = 0; j < 100; ++j) {
131 			uint32_t status =
132 				RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
133 
134 			if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
135 				return 0;
136 			mdelay(10);
137 		}
138 
139 		DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
140 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
141 				VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
142 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
143 		mdelay(10);
144 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
145 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
146 		mdelay(10);
147 
148 	}
149 
150 	return -ETIMEDOUT;
151 }
152 
153 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
154 				struct amdgpu_mm_table *table)
155 {
156 	uint32_t data = 0, loop;
157 	uint64_t addr = table->gpu_addr;
158 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
159 	uint32_t size;
160 
161 	size = header->header_size + header->vce_table_size + header->uvd_table_size;
162 
163 	/* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
164 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
165 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
166 
167 	/* 2, update vmid of descriptor */
168 	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
169 	data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
170 	data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
171 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
172 
173 	/* 3, notify mmsch about the size of this descriptor */
174 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
175 
176 	/* 4, set resp to zero */
177 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
178 
179 	WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
180 	*adev->vce.ring[0].wptr_cpu_addr = 0;
181 	adev->vce.ring[0].wptr = 0;
182 	adev->vce.ring[0].wptr_old = 0;
183 
184 	/* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
185 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
186 
187 	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
188 	loop = 1000;
189 	while ((data & 0x10000002) != 0x10000002) {
190 		udelay(10);
191 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
192 		loop--;
193 		if (!loop)
194 			break;
195 	}
196 
197 	if (!loop) {
198 		dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
199 		return -EBUSY;
200 	}
201 
202 	return 0;
203 }
204 
205 static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
206 {
207 	struct amdgpu_ring *ring;
208 	uint32_t offset, size;
209 	uint32_t table_size = 0;
210 	struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
211 	struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
212 	struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
213 	struct mmsch_v1_0_cmd_end end = { { 0 } };
214 	uint32_t *init_table = adev->virt.mm_table.cpu_addr;
215 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
216 
217 	direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
218 	direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
219 	direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
220 	end.cmd_header.command_type = MMSCH_COMMAND__END;
221 
222 	if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
223 		header->version = MMSCH_VERSION;
224 		header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
225 
226 		if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
227 			header->vce_table_offset = header->header_size;
228 		else
229 			header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
230 
231 		init_table += header->vce_table_offset;
232 
233 		ring = &adev->vce.ring[0];
234 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
235 					    lower_32_bits(ring->gpu_addr));
236 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
237 					    upper_32_bits(ring->gpu_addr));
238 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
239 					    ring->ring_size / 4);
240 
241 		/* BEGING OF MC_RESUME */
242 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
243 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
244 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
245 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
246 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
247 
248 		offset = AMDGPU_VCE_FIRMWARE_OFFSET;
249 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
250 			uint32_t low = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
251 			uint32_t hi = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi;
252 			uint64_t tmr_mc_addr = (uint64_t)(hi) << 32 | low;
253 
254 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
255 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), tmr_mc_addr >> 8);
256 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
257 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
258 						(tmr_mc_addr >> 40) & 0xff);
259 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
260 		} else {
261 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
262 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
263 						adev->vce.gpu_addr >> 8);
264 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
265 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
266 						(adev->vce.gpu_addr >> 40) & 0xff);
267 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
268 						offset & ~0x0f000000);
269 
270 		}
271 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
272 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
273 						adev->vce.gpu_addr >> 8);
274 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
275 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR1),
276 						(adev->vce.gpu_addr >> 40) & 0xff);
277 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
278 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
279 						adev->vce.gpu_addr >> 8);
280 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
281 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR2),
282 						(adev->vce.gpu_addr >> 40) & 0xff);
283 
284 		size = VCE_V4_0_FW_SIZE;
285 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
286 
287 		offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
288 		size = VCE_V4_0_STACK_SIZE;
289 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
290 					(offset & ~0x0f000000) | (1 << 24));
291 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
292 
293 		offset += size;
294 		size = VCE_V4_0_DATA_SIZE;
295 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
296 					(offset & ~0x0f000000) | (2 << 24));
297 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
298 
299 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
300 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
301 						   VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
302 						   VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
303 
304 		/* end of MC_RESUME */
305 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
306 						   VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
307 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
308 						   ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
309 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
310 						   ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
311 
312 		MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
313 					      VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
314 					      VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
315 
316 		/* clear BUSY flag */
317 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
318 						   ~VCE_STATUS__JOB_BUSY_MASK, 0);
319 
320 		/* add end packet */
321 		memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
322 		table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
323 		header->vce_table_size = table_size;
324 	}
325 
326 	return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
327 }
328 
329 /**
330  * vce_v4_0_start - start VCE block
331  *
332  * @adev: amdgpu_device pointer
333  *
334  * Setup and start the VCE block
335  */
336 static int vce_v4_0_start(struct amdgpu_device *adev)
337 {
338 	struct amdgpu_ring *ring;
339 	int r;
340 
341 	ring = &adev->vce.ring[0];
342 
343 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
344 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
345 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
346 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
347 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
348 
349 	ring = &adev->vce.ring[1];
350 
351 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
352 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
353 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
354 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
355 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
356 
357 	ring = &adev->vce.ring[2];
358 
359 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
360 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
361 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
362 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
363 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
364 
365 	vce_v4_0_mc_resume(adev);
366 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
367 			~VCE_STATUS__JOB_BUSY_MASK);
368 
369 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
370 
371 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
372 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
373 	mdelay(100);
374 
375 	r = vce_v4_0_firmware_loaded(adev);
376 
377 	/* clear BUSY flag */
378 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
379 
380 	if (r) {
381 		DRM_ERROR("VCE not responding, giving up!!!\n");
382 		return r;
383 	}
384 
385 	return 0;
386 }
387 
388 static int vce_v4_0_stop(struct amdgpu_device *adev)
389 {
390 
391 	/* Disable VCPU */
392 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
393 
394 	/* hold on ECPU */
395 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
396 			VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
397 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
398 
399 	/* clear VCE_STATUS */
400 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0);
401 
402 	/* Set Clock-Gating off */
403 	/* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
404 		vce_v4_0_set_vce_sw_clock_gating(adev, false);
405 	*/
406 
407 	return 0;
408 }
409 
410 static int vce_v4_0_early_init(void *handle)
411 {
412 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
413 
414 	if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
415 		adev->vce.num_rings = 1;
416 	else
417 		adev->vce.num_rings = 3;
418 
419 	vce_v4_0_set_ring_funcs(adev);
420 	vce_v4_0_set_irq_funcs(adev);
421 
422 	return 0;
423 }
424 
425 static int vce_v4_0_sw_init(void *handle)
426 {
427 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
428 	struct amdgpu_ring *ring;
429 
430 	unsigned size;
431 	int r, i;
432 
433 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
434 	if (r)
435 		return r;
436 
437 	size  = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE;
438 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
439 		size += VCE_V4_0_FW_SIZE;
440 
441 	r = amdgpu_vce_sw_init(adev, size);
442 	if (r)
443 		return r;
444 
445 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
446 		const struct common_firmware_header *hdr;
447 		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
448 
449 		adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL);
450 		if (!adev->vce.saved_bo)
451 			return -ENOMEM;
452 
453 		hdr = (const struct common_firmware_header *)adev->vce.fw->data;
454 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
455 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
456 		adev->firmware.fw_size +=
457 			ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
458 		DRM_INFO("PSP loading VCE firmware\n");
459 	} else {
460 		r = amdgpu_vce_resume(adev);
461 		if (r)
462 			return r;
463 	}
464 
465 	for (i = 0; i < adev->vce.num_rings; i++) {
466 		enum amdgpu_ring_priority_level hw_prio = amdgpu_vce_get_ring_prio(i);
467 
468 		ring = &adev->vce.ring[i];
469 		ring->vm_hub = AMDGPU_MMHUB_0;
470 		sprintf(ring->name, "vce%d", i);
471 		if (amdgpu_sriov_vf(adev)) {
472 			/* DOORBELL only works under SRIOV */
473 			ring->use_doorbell = true;
474 
475 			/* currently only use the first encoding ring for sriov,
476 			 * so set unused location for other unused rings.
477 			 */
478 			if (i == 0)
479 				ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring0_1 * 2;
480 			else
481 				ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring2_3 * 2 + 1;
482 		}
483 		r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0,
484 				     hw_prio, NULL);
485 		if (r)
486 			return r;
487 	}
488 
489 
490 	r = amdgpu_vce_entity_init(adev);
491 	if (r)
492 		return r;
493 
494 	r = amdgpu_virt_alloc_mm_table(adev);
495 	if (r)
496 		return r;
497 
498 	return r;
499 }
500 
501 static int vce_v4_0_sw_fini(void *handle)
502 {
503 	int r;
504 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
505 
506 	/* free MM table */
507 	amdgpu_virt_free_mm_table(adev);
508 
509 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
510 		kvfree(adev->vce.saved_bo);
511 		adev->vce.saved_bo = NULL;
512 	}
513 
514 	r = amdgpu_vce_suspend(adev);
515 	if (r)
516 		return r;
517 
518 	return amdgpu_vce_sw_fini(adev);
519 }
520 
521 static int vce_v4_0_hw_init(void *handle)
522 {
523 	int r, i;
524 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
525 
526 	if (amdgpu_sriov_vf(adev))
527 		r = vce_v4_0_sriov_start(adev);
528 	else
529 		r = vce_v4_0_start(adev);
530 	if (r)
531 		return r;
532 
533 	for (i = 0; i < adev->vce.num_rings; i++) {
534 		r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
535 		if (r)
536 			return r;
537 	}
538 
539 	DRM_INFO("VCE initialized successfully.\n");
540 
541 	return 0;
542 }
543 
544 static int vce_v4_0_hw_fini(void *handle)
545 {
546 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
547 
548 	cancel_delayed_work_sync(&adev->vce.idle_work);
549 
550 	if (!amdgpu_sriov_vf(adev)) {
551 		/* vce_v4_0_wait_for_idle(handle); */
552 		vce_v4_0_stop(adev);
553 	} else {
554 		/* full access mode, so don't touch any VCE register */
555 		DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
556 	}
557 
558 	return 0;
559 }
560 
561 static int vce_v4_0_suspend(void *handle)
562 {
563 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
564 	int r, idx;
565 
566 	if (adev->vce.vcpu_bo == NULL)
567 		return 0;
568 
569 	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
570 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
571 			unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
572 			void *ptr = adev->vce.cpu_addr;
573 
574 			memcpy_fromio(adev->vce.saved_bo, ptr, size);
575 		}
576 		drm_dev_exit(idx);
577 	}
578 
579 	/*
580 	 * Proper cleanups before halting the HW engine:
581 	 *   - cancel the delayed idle work
582 	 *   - enable powergating
583 	 *   - enable clockgating
584 	 *   - disable dpm
585 	 *
586 	 * TODO: to align with the VCN implementation, move the
587 	 * jobs for clockgating/powergating/dpm setting to
588 	 * ->set_powergating_state().
589 	 */
590 	cancel_delayed_work_sync(&adev->vce.idle_work);
591 
592 	if (adev->pm.dpm_enabled) {
593 		amdgpu_dpm_enable_vce(adev, false);
594 	} else {
595 		amdgpu_asic_set_vce_clocks(adev, 0, 0);
596 		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
597 						       AMD_PG_STATE_GATE);
598 		amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
599 						       AMD_CG_STATE_GATE);
600 	}
601 
602 	r = vce_v4_0_hw_fini(adev);
603 	if (r)
604 		return r;
605 
606 	return amdgpu_vce_suspend(adev);
607 }
608 
609 static int vce_v4_0_resume(void *handle)
610 {
611 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
612 	int r, idx;
613 
614 	if (adev->vce.vcpu_bo == NULL)
615 		return -EINVAL;
616 
617 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
618 
619 		if (drm_dev_enter(adev_to_drm(adev), &idx)) {
620 			unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
621 			void *ptr = adev->vce.cpu_addr;
622 
623 			memcpy_toio(ptr, adev->vce.saved_bo, size);
624 			drm_dev_exit(idx);
625 		}
626 	} else {
627 		r = amdgpu_vce_resume(adev);
628 		if (r)
629 			return r;
630 	}
631 
632 	return vce_v4_0_hw_init(adev);
633 }
634 
635 static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
636 {
637 	uint32_t offset, size;
638 	uint64_t tmr_mc_addr;
639 
640 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
641 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
642 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
643 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
644 
645 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
646 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
647 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
648 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
649 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
650 
651 	offset = AMDGPU_VCE_FIRMWARE_OFFSET;
652 
653 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
654 		tmr_mc_addr = (uint64_t)(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi) << 32 |
655 										adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
656 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
657 			(tmr_mc_addr >> 8));
658 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
659 			(tmr_mc_addr >> 40) & 0xff);
660 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
661 	} else {
662 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
663 			(adev->vce.gpu_addr >> 8));
664 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
665 			(adev->vce.gpu_addr >> 40) & 0xff);
666 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
667 	}
668 
669 	size = VCE_V4_0_FW_SIZE;
670 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
671 
672 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
673 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
674 	offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
675 	size = VCE_V4_0_STACK_SIZE;
676 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
677 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
678 
679 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
680 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
681 	offset += size;
682 	size = VCE_V4_0_DATA_SIZE;
683 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
684 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
685 
686 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
687 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
688 			VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
689 			~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
690 }
691 
692 static int vce_v4_0_set_clockgating_state(void *handle,
693 					  enum amd_clockgating_state state)
694 {
695 	/* needed for driver unload*/
696 	return 0;
697 }
698 
699 #if 0
700 static bool vce_v4_0_is_idle(void *handle)
701 {
702 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
703 	u32 mask = 0;
704 
705 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
706 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
707 
708 	return !(RREG32(mmSRBM_STATUS2) & mask);
709 }
710 
711 static int vce_v4_0_wait_for_idle(void *handle)
712 {
713 	unsigned i;
714 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
715 
716 	for (i = 0; i < adev->usec_timeout; i++)
717 		if (vce_v4_0_is_idle(handle))
718 			return 0;
719 
720 	return -ETIMEDOUT;
721 }
722 
723 #define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
724 #define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
725 #define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
726 #define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
727 				      VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
728 
729 static bool vce_v4_0_check_soft_reset(void *handle)
730 {
731 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
732 	u32 srbm_soft_reset = 0;
733 
734 	/* According to VCE team , we should use VCE_STATUS instead
735 	 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
736 	 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
737 	 * instance's registers are accessed
738 	 * (0 for 1st instance, 10 for 2nd instance).
739 	 *
740 	 *VCE_STATUS
741 	 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
742 	 *|----+----+-----------+----+----+----+----------+---------+----|
743 	 *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
744 	 *
745 	 * VCE team suggest use bit 3--bit 6 for busy status check
746 	 */
747 	mutex_lock(&adev->grbm_idx_mutex);
748 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
749 	if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
750 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
751 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
752 	}
753 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
754 	if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
755 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
756 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
757 	}
758 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
759 	mutex_unlock(&adev->grbm_idx_mutex);
760 
761 	if (srbm_soft_reset) {
762 		adev->vce.srbm_soft_reset = srbm_soft_reset;
763 		return true;
764 	} else {
765 		adev->vce.srbm_soft_reset = 0;
766 		return false;
767 	}
768 }
769 
770 static int vce_v4_0_soft_reset(void *handle)
771 {
772 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
773 	u32 srbm_soft_reset;
774 
775 	if (!adev->vce.srbm_soft_reset)
776 		return 0;
777 	srbm_soft_reset = adev->vce.srbm_soft_reset;
778 
779 	if (srbm_soft_reset) {
780 		u32 tmp;
781 
782 		tmp = RREG32(mmSRBM_SOFT_RESET);
783 		tmp |= srbm_soft_reset;
784 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
785 		WREG32(mmSRBM_SOFT_RESET, tmp);
786 		tmp = RREG32(mmSRBM_SOFT_RESET);
787 
788 		udelay(50);
789 
790 		tmp &= ~srbm_soft_reset;
791 		WREG32(mmSRBM_SOFT_RESET, tmp);
792 		tmp = RREG32(mmSRBM_SOFT_RESET);
793 
794 		/* Wait a little for things to settle down */
795 		udelay(50);
796 	}
797 
798 	return 0;
799 }
800 
801 static int vce_v4_0_pre_soft_reset(void *handle)
802 {
803 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
804 
805 	if (!adev->vce.srbm_soft_reset)
806 		return 0;
807 
808 	mdelay(5);
809 
810 	return vce_v4_0_suspend(adev);
811 }
812 
813 
814 static int vce_v4_0_post_soft_reset(void *handle)
815 {
816 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
817 
818 	if (!adev->vce.srbm_soft_reset)
819 		return 0;
820 
821 	mdelay(5);
822 
823 	return vce_v4_0_resume(adev);
824 }
825 
826 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
827 {
828 	u32 tmp, data;
829 
830 	tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
831 	if (override)
832 		data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
833 	else
834 		data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
835 
836 	if (tmp != data)
837 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
838 }
839 
840 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
841 					     bool gated)
842 {
843 	u32 data;
844 
845 	/* Set Override to disable Clock Gating */
846 	vce_v4_0_override_vce_clock_gating(adev, true);
847 
848 	/* This function enables MGCG which is controlled by firmware.
849 	   With the clocks in the gated state the core is still
850 	   accessible but the firmware will throttle the clocks on the
851 	   fly as necessary.
852 	*/
853 	if (gated) {
854 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
855 		data |= 0x1ff;
856 		data &= ~0xef0000;
857 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
858 
859 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
860 		data |= 0x3ff000;
861 		data &= ~0xffc00000;
862 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
863 
864 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
865 		data |= 0x2;
866 		data &= ~0x00010000;
867 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
868 
869 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
870 		data |= 0x37f;
871 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
872 
873 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
874 		data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
875 			VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
876 			VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
877 			0x8;
878 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
879 	} else {
880 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
881 		data &= ~0x80010;
882 		data |= 0xe70008;
883 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
884 
885 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
886 		data |= 0xffc00000;
887 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
888 
889 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
890 		data |= 0x10000;
891 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
892 
893 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
894 		data &= ~0xffc00000;
895 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
896 
897 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
898 		data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
899 			  VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
900 			  VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
901 			  0x8);
902 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
903 	}
904 	vce_v4_0_override_vce_clock_gating(adev, false);
905 }
906 
907 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
908 {
909 	u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
910 
911 	if (enable)
912 		tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
913 	else
914 		tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
915 
916 	WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
917 }
918 
919 static int vce_v4_0_set_clockgating_state(void *handle,
920 					  enum amd_clockgating_state state)
921 {
922 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
923 	bool enable = (state == AMD_CG_STATE_GATE);
924 	int i;
925 
926 	if ((adev->asic_type == CHIP_POLARIS10) ||
927 		(adev->asic_type == CHIP_TONGA) ||
928 		(adev->asic_type == CHIP_FIJI))
929 		vce_v4_0_set_bypass_mode(adev, enable);
930 
931 	if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
932 		return 0;
933 
934 	mutex_lock(&adev->grbm_idx_mutex);
935 	for (i = 0; i < 2; i++) {
936 		/* Program VCE Instance 0 or 1 if not harvested */
937 		if (adev->vce.harvest_config & (1 << i))
938 			continue;
939 
940 		WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
941 
942 		if (enable) {
943 			/* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
944 			uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
945 			data &= ~(0xf | 0xff0);
946 			data |= ((0x0 << 0) | (0x04 << 4));
947 			WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
948 
949 			/* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
950 			data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
951 			data &= ~(0xf | 0xff0);
952 			data |= ((0x0 << 0) | (0x04 << 4));
953 			WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
954 		}
955 
956 		vce_v4_0_set_vce_sw_clock_gating(adev, enable);
957 	}
958 
959 	WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
960 	mutex_unlock(&adev->grbm_idx_mutex);
961 
962 	return 0;
963 }
964 #endif
965 
966 static int vce_v4_0_set_powergating_state(void *handle,
967 					  enum amd_powergating_state state)
968 {
969 	/* This doesn't actually powergate the VCE block.
970 	 * That's done in the dpm code via the SMC.  This
971 	 * just re-inits the block as necessary.  The actual
972 	 * gating still happens in the dpm code.  We should
973 	 * revisit this when there is a cleaner line between
974 	 * the smc and the hw blocks
975 	 */
976 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
977 
978 	if (state == AMD_PG_STATE_GATE)
979 		return vce_v4_0_stop(adev);
980 	else
981 		return vce_v4_0_start(adev);
982 }
983 
984 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
985 					struct amdgpu_ib *ib, uint32_t flags)
986 {
987 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
988 
989 	amdgpu_ring_write(ring, VCE_CMD_IB_VM);
990 	amdgpu_ring_write(ring, vmid);
991 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
992 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
993 	amdgpu_ring_write(ring, ib->length_dw);
994 }
995 
996 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
997 			u64 seq, unsigned flags)
998 {
999 	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
1000 
1001 	amdgpu_ring_write(ring, VCE_CMD_FENCE);
1002 	amdgpu_ring_write(ring, addr);
1003 	amdgpu_ring_write(ring, upper_32_bits(addr));
1004 	amdgpu_ring_write(ring, seq);
1005 	amdgpu_ring_write(ring, VCE_CMD_TRAP);
1006 }
1007 
1008 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
1009 {
1010 	amdgpu_ring_write(ring, VCE_CMD_END);
1011 }
1012 
1013 static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
1014 				   uint32_t val, uint32_t mask)
1015 {
1016 	amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
1017 	amdgpu_ring_write(ring,	reg << 2);
1018 	amdgpu_ring_write(ring, mask);
1019 	amdgpu_ring_write(ring, val);
1020 }
1021 
1022 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
1023 				   unsigned int vmid, uint64_t pd_addr)
1024 {
1025 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
1026 
1027 	pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
1028 
1029 	/* wait for reg writes */
1030 	vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 +
1031 			       vmid * hub->ctx_addr_distance,
1032 			       lower_32_bits(pd_addr), 0xffffffff);
1033 }
1034 
1035 static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring,
1036 			       uint32_t reg, uint32_t val)
1037 {
1038 	amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
1039 	amdgpu_ring_write(ring,	reg << 2);
1040 	amdgpu_ring_write(ring, val);
1041 }
1042 
1043 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
1044 					struct amdgpu_irq_src *source,
1045 					unsigned type,
1046 					enum amdgpu_interrupt_state state)
1047 {
1048 	uint32_t val = 0;
1049 
1050 	if (!amdgpu_sriov_vf(adev)) {
1051 		if (state == AMDGPU_IRQ_STATE_ENABLE)
1052 			val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
1053 
1054 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
1055 				~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
1056 	}
1057 	return 0;
1058 }
1059 
1060 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1061 				      struct amdgpu_irq_src *source,
1062 				      struct amdgpu_iv_entry *entry)
1063 {
1064 	DRM_DEBUG("IH: VCE\n");
1065 
1066 	switch (entry->src_data[0]) {
1067 	case 0:
1068 	case 1:
1069 	case 2:
1070 		amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
1071 		break;
1072 	default:
1073 		DRM_ERROR("Unhandled interrupt: %d %d\n",
1074 			  entry->src_id, entry->src_data[0]);
1075 		break;
1076 	}
1077 
1078 	return 0;
1079 }
1080 
1081 const struct amd_ip_funcs vce_v4_0_ip_funcs = {
1082 	.name = "vce_v4_0",
1083 	.early_init = vce_v4_0_early_init,
1084 	.late_init = NULL,
1085 	.sw_init = vce_v4_0_sw_init,
1086 	.sw_fini = vce_v4_0_sw_fini,
1087 	.hw_init = vce_v4_0_hw_init,
1088 	.hw_fini = vce_v4_0_hw_fini,
1089 	.suspend = vce_v4_0_suspend,
1090 	.resume = vce_v4_0_resume,
1091 	.is_idle = NULL /* vce_v4_0_is_idle */,
1092 	.wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1093 	.check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1094 	.pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1095 	.soft_reset = NULL /* vce_v4_0_soft_reset */,
1096 	.post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1097 	.set_clockgating_state = vce_v4_0_set_clockgating_state,
1098 	.set_powergating_state = vce_v4_0_set_powergating_state,
1099 };
1100 
1101 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1102 	.type = AMDGPU_RING_TYPE_VCE,
1103 	.align_mask = 0x3f,
1104 	.nop = VCE_CMD_NO_OP,
1105 	.support_64bit_ptrs = false,
1106 	.no_user_fence = true,
1107 	.get_rptr = vce_v4_0_ring_get_rptr,
1108 	.get_wptr = vce_v4_0_ring_get_wptr,
1109 	.set_wptr = vce_v4_0_ring_set_wptr,
1110 	.parse_cs = amdgpu_vce_ring_parse_cs_vm,
1111 	.emit_frame_size =
1112 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1113 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1114 		4 + /* vce_v4_0_emit_vm_flush */
1115 		5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1116 		1, /* vce_v4_0_ring_insert_end */
1117 	.emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1118 	.emit_ib = vce_v4_0_ring_emit_ib,
1119 	.emit_vm_flush = vce_v4_0_emit_vm_flush,
1120 	.emit_fence = vce_v4_0_ring_emit_fence,
1121 	.test_ring = amdgpu_vce_ring_test_ring,
1122 	.test_ib = amdgpu_vce_ring_test_ib,
1123 	.insert_nop = amdgpu_ring_insert_nop,
1124 	.insert_end = vce_v4_0_ring_insert_end,
1125 	.pad_ib = amdgpu_ring_generic_pad_ib,
1126 	.begin_use = amdgpu_vce_ring_begin_use,
1127 	.end_use = amdgpu_vce_ring_end_use,
1128 	.emit_wreg = vce_v4_0_emit_wreg,
1129 	.emit_reg_wait = vce_v4_0_emit_reg_wait,
1130 	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1131 };
1132 
1133 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1134 {
1135 	int i;
1136 
1137 	for (i = 0; i < adev->vce.num_rings; i++) {
1138 		adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1139 		adev->vce.ring[i].me = i;
1140 	}
1141 	DRM_INFO("VCE enabled in VM mode\n");
1142 }
1143 
1144 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1145 	.set = vce_v4_0_set_interrupt_state,
1146 	.process = vce_v4_0_process_interrupt,
1147 };
1148 
1149 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1150 {
1151 	adev->vce.irq.num_types = 1;
1152 	adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1153 };
1154 
1155 const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1156 {
1157 	.type = AMD_IP_BLOCK_TYPE_VCE,
1158 	.major = 4,
1159 	.minor = 0,
1160 	.rev = 0,
1161 	.funcs = &vce_v4_0_ip_funcs,
1162 };
1163