xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c (revision 19b438592238b3b40c3f945bb5f9c4ca971c0c45)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  */
26 
27 #include <linux/firmware.h>
28 #include <drm/drm_drv.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_vce.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "soc15_common.h"
35 #include "mmsch_v1_0.h"
36 
37 #include "vce/vce_4_0_offset.h"
38 #include "vce/vce_4_0_default.h"
39 #include "vce/vce_4_0_sh_mask.h"
40 #include "mmhub/mmhub_1_0_offset.h"
41 #include "mmhub/mmhub_1_0_sh_mask.h"
42 
43 #include "ivsrcid/vce/irqsrcs_vce_4_0.h"
44 
45 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
46 
47 #define VCE_V4_0_FW_SIZE	(384 * 1024)
48 #define VCE_V4_0_STACK_SIZE	(64 * 1024)
49 #define VCE_V4_0_DATA_SIZE	((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
50 
51 static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
52 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
53 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
54 
55 /**
56  * vce_v4_0_ring_get_rptr - get read pointer
57  *
58  * @ring: amdgpu_ring pointer
59  *
60  * Returns the current hardware read pointer
61  */
62 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
63 {
64 	struct amdgpu_device *adev = ring->adev;
65 
66 	if (ring->me == 0)
67 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
68 	else if (ring->me == 1)
69 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
70 	else
71 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
72 }
73 
74 /**
75  * vce_v4_0_ring_get_wptr - get write pointer
76  *
77  * @ring: amdgpu_ring pointer
78  *
79  * Returns the current hardware write pointer
80  */
81 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
82 {
83 	struct amdgpu_device *adev = ring->adev;
84 
85 	if (ring->use_doorbell)
86 		return adev->wb.wb[ring->wptr_offs];
87 
88 	if (ring->me == 0)
89 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
90 	else if (ring->me == 1)
91 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
92 	else
93 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
94 }
95 
96 /**
97  * vce_v4_0_ring_set_wptr - set write pointer
98  *
99  * @ring: amdgpu_ring pointer
100  *
101  * Commits the write pointer to the hardware
102  */
103 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
104 {
105 	struct amdgpu_device *adev = ring->adev;
106 
107 	if (ring->use_doorbell) {
108 		/* XXX check if swapping is necessary on BE */
109 		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
110 		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
111 		return;
112 	}
113 
114 	if (ring->me == 0)
115 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
116 			lower_32_bits(ring->wptr));
117 	else if (ring->me == 1)
118 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
119 			lower_32_bits(ring->wptr));
120 	else
121 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
122 			lower_32_bits(ring->wptr));
123 }
124 
125 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
126 {
127 	int i, j;
128 
129 	for (i = 0; i < 10; ++i) {
130 		for (j = 0; j < 100; ++j) {
131 			uint32_t status =
132 				RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
133 
134 			if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
135 				return 0;
136 			mdelay(10);
137 		}
138 
139 		DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
140 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
141 				VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
142 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
143 		mdelay(10);
144 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
145 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
146 		mdelay(10);
147 
148 	}
149 
150 	return -ETIMEDOUT;
151 }
152 
153 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
154 				struct amdgpu_mm_table *table)
155 {
156 	uint32_t data = 0, loop;
157 	uint64_t addr = table->gpu_addr;
158 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
159 	uint32_t size;
160 
161 	size = header->header_size + header->vce_table_size + header->uvd_table_size;
162 
163 	/* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
164 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
165 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
166 
167 	/* 2, update vmid of descriptor */
168 	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
169 	data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
170 	data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
171 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
172 
173 	/* 3, notify mmsch about the size of this descriptor */
174 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
175 
176 	/* 4, set resp to zero */
177 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
178 
179 	WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
180 	adev->wb.wb[adev->vce.ring[0].wptr_offs] = 0;
181 	adev->vce.ring[0].wptr = 0;
182 	adev->vce.ring[0].wptr_old = 0;
183 
184 	/* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
185 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
186 
187 	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
188 	loop = 1000;
189 	while ((data & 0x10000002) != 0x10000002) {
190 		udelay(10);
191 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
192 		loop--;
193 		if (!loop)
194 			break;
195 	}
196 
197 	if (!loop) {
198 		dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
199 		return -EBUSY;
200 	}
201 
202 	return 0;
203 }
204 
205 static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
206 {
207 	struct amdgpu_ring *ring;
208 	uint32_t offset, size;
209 	uint32_t table_size = 0;
210 	struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
211 	struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
212 	struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
213 	struct mmsch_v1_0_cmd_end end = { { 0 } };
214 	uint32_t *init_table = adev->virt.mm_table.cpu_addr;
215 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
216 
217 	direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
218 	direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
219 	direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
220 	end.cmd_header.command_type = MMSCH_COMMAND__END;
221 
222 	if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
223 		header->version = MMSCH_VERSION;
224 		header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
225 
226 		if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
227 			header->vce_table_offset = header->header_size;
228 		else
229 			header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
230 
231 		init_table += header->vce_table_offset;
232 
233 		ring = &adev->vce.ring[0];
234 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
235 					    lower_32_bits(ring->gpu_addr));
236 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
237 					    upper_32_bits(ring->gpu_addr));
238 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
239 					    ring->ring_size / 4);
240 
241 		/* BEGING OF MC_RESUME */
242 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
243 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
244 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
245 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
246 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
247 
248 		offset = AMDGPU_VCE_FIRMWARE_OFFSET;
249 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
250 			uint32_t low = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
251 			uint32_t hi = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi;
252 			uint64_t tmr_mc_addr = (uint64_t)(hi) << 32 | low;
253 
254 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
255 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), tmr_mc_addr >> 8);
256 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
257 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
258 						(tmr_mc_addr >> 40) & 0xff);
259 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
260 		} else {
261 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
262 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
263 						adev->vce.gpu_addr >> 8);
264 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
265 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
266 						(adev->vce.gpu_addr >> 40) & 0xff);
267 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
268 						offset & ~0x0f000000);
269 
270 		}
271 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
272 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
273 						adev->vce.gpu_addr >> 8);
274 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
275 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR1),
276 						(adev->vce.gpu_addr >> 40) & 0xff);
277 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
278 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
279 						adev->vce.gpu_addr >> 8);
280 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
281 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR2),
282 						(adev->vce.gpu_addr >> 40) & 0xff);
283 
284 		size = VCE_V4_0_FW_SIZE;
285 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
286 
287 		offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
288 		size = VCE_V4_0_STACK_SIZE;
289 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
290 					(offset & ~0x0f000000) | (1 << 24));
291 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
292 
293 		offset += size;
294 		size = VCE_V4_0_DATA_SIZE;
295 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
296 					(offset & ~0x0f000000) | (2 << 24));
297 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
298 
299 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
300 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
301 						   VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
302 						   VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
303 
304 		/* end of MC_RESUME */
305 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
306 						   VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
307 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
308 						   ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
309 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
310 						   ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
311 
312 		MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
313 					      VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
314 					      VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
315 
316 		/* clear BUSY flag */
317 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
318 						   ~VCE_STATUS__JOB_BUSY_MASK, 0);
319 
320 		/* add end packet */
321 		memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
322 		table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
323 		header->vce_table_size = table_size;
324 	}
325 
326 	return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
327 }
328 
329 /**
330  * vce_v4_0_start - start VCE block
331  *
332  * @adev: amdgpu_device pointer
333  *
334  * Setup and start the VCE block
335  */
336 static int vce_v4_0_start(struct amdgpu_device *adev)
337 {
338 	struct amdgpu_ring *ring;
339 	int r;
340 
341 	ring = &adev->vce.ring[0];
342 
343 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
344 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
345 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
346 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
347 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
348 
349 	ring = &adev->vce.ring[1];
350 
351 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
352 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
353 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
354 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
355 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
356 
357 	ring = &adev->vce.ring[2];
358 
359 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
360 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
361 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
362 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
363 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
364 
365 	vce_v4_0_mc_resume(adev);
366 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
367 			~VCE_STATUS__JOB_BUSY_MASK);
368 
369 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
370 
371 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
372 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
373 	mdelay(100);
374 
375 	r = vce_v4_0_firmware_loaded(adev);
376 
377 	/* clear BUSY flag */
378 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
379 
380 	if (r) {
381 		DRM_ERROR("VCE not responding, giving up!!!\n");
382 		return r;
383 	}
384 
385 	return 0;
386 }
387 
388 static int vce_v4_0_stop(struct amdgpu_device *adev)
389 {
390 
391 	/* Disable VCPU */
392 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
393 
394 	/* hold on ECPU */
395 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
396 			VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
397 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
398 
399 	/* clear VCE_STATUS */
400 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0);
401 
402 	/* Set Clock-Gating off */
403 	/* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
404 		vce_v4_0_set_vce_sw_clock_gating(adev, false);
405 	*/
406 
407 	return 0;
408 }
409 
410 static int vce_v4_0_early_init(void *handle)
411 {
412 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
413 
414 	if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
415 		adev->vce.num_rings = 1;
416 	else
417 		adev->vce.num_rings = 3;
418 
419 	vce_v4_0_set_ring_funcs(adev);
420 	vce_v4_0_set_irq_funcs(adev);
421 
422 	return 0;
423 }
424 
425 static int vce_v4_0_sw_init(void *handle)
426 {
427 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
428 	struct amdgpu_ring *ring;
429 
430 	unsigned size;
431 	int r, i;
432 
433 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
434 	if (r)
435 		return r;
436 
437 	size  = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE;
438 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
439 		size += VCE_V4_0_FW_SIZE;
440 
441 	r = amdgpu_vce_sw_init(adev, size);
442 	if (r)
443 		return r;
444 
445 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
446 		const struct common_firmware_header *hdr;
447 		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
448 
449 		adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL);
450 		if (!adev->vce.saved_bo)
451 			return -ENOMEM;
452 
453 		hdr = (const struct common_firmware_header *)adev->vce.fw->data;
454 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
455 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
456 		adev->firmware.fw_size +=
457 			ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
458 		DRM_INFO("PSP loading VCE firmware\n");
459 	} else {
460 		r = amdgpu_vce_resume(adev);
461 		if (r)
462 			return r;
463 	}
464 
465 	for (i = 0; i < adev->vce.num_rings; i++) {
466 		ring = &adev->vce.ring[i];
467 		sprintf(ring->name, "vce%d", i);
468 		if (amdgpu_sriov_vf(adev)) {
469 			/* DOORBELL only works under SRIOV */
470 			ring->use_doorbell = true;
471 
472 			/* currently only use the first encoding ring for sriov,
473 			 * so set unused location for other unused rings.
474 			 */
475 			if (i == 0)
476 				ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring0_1 * 2;
477 			else
478 				ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring2_3 * 2 + 1;
479 		}
480 		r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0,
481 				     AMDGPU_RING_PRIO_DEFAULT, NULL);
482 		if (r)
483 			return r;
484 	}
485 
486 
487 	r = amdgpu_vce_entity_init(adev);
488 	if (r)
489 		return r;
490 
491 	r = amdgpu_virt_alloc_mm_table(adev);
492 	if (r)
493 		return r;
494 
495 	return r;
496 }
497 
498 static int vce_v4_0_sw_fini(void *handle)
499 {
500 	int r;
501 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
502 
503 	/* free MM table */
504 	amdgpu_virt_free_mm_table(adev);
505 
506 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
507 		kvfree(adev->vce.saved_bo);
508 		adev->vce.saved_bo = NULL;
509 	}
510 
511 	r = amdgpu_vce_suspend(adev);
512 	if (r)
513 		return r;
514 
515 	return amdgpu_vce_sw_fini(adev);
516 }
517 
518 static int vce_v4_0_hw_init(void *handle)
519 {
520 	int r, i;
521 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
522 
523 	if (amdgpu_sriov_vf(adev))
524 		r = vce_v4_0_sriov_start(adev);
525 	else
526 		r = vce_v4_0_start(adev);
527 	if (r)
528 		return r;
529 
530 	for (i = 0; i < adev->vce.num_rings; i++) {
531 		r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
532 		if (r)
533 			return r;
534 	}
535 
536 	DRM_INFO("VCE initialized successfully.\n");
537 
538 	return 0;
539 }
540 
541 static int vce_v4_0_hw_fini(void *handle)
542 {
543 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
544 
545 	if (!amdgpu_sriov_vf(adev)) {
546 		/* vce_v4_0_wait_for_idle(handle); */
547 		vce_v4_0_stop(adev);
548 	} else {
549 		/* full access mode, so don't touch any VCE register */
550 		DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
551 	}
552 
553 	return 0;
554 }
555 
556 static int vce_v4_0_suspend(void *handle)
557 {
558 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
559 	int r, idx;
560 
561 	if (adev->vce.vcpu_bo == NULL)
562 		return 0;
563 
564 	if (drm_dev_enter(&adev->ddev, &idx)) {
565 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
566 			unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
567 			void *ptr = adev->vce.cpu_addr;
568 
569 			memcpy_fromio(adev->vce.saved_bo, ptr, size);
570 		}
571 		drm_dev_exit(idx);
572 	}
573 
574 	r = vce_v4_0_hw_fini(adev);
575 	if (r)
576 		return r;
577 
578 	return amdgpu_vce_suspend(adev);
579 }
580 
581 static int vce_v4_0_resume(void *handle)
582 {
583 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
584 	int r, idx;
585 
586 	if (adev->vce.vcpu_bo == NULL)
587 		return -EINVAL;
588 
589 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
590 
591 		if (drm_dev_enter(&adev->ddev, &idx)) {
592 			unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
593 			void *ptr = adev->vce.cpu_addr;
594 
595 			memcpy_toio(ptr, adev->vce.saved_bo, size);
596 			drm_dev_exit(idx);
597 		}
598 	} else {
599 		r = amdgpu_vce_resume(adev);
600 		if (r)
601 			return r;
602 	}
603 
604 	return vce_v4_0_hw_init(adev);
605 }
606 
607 static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
608 {
609 	uint32_t offset, size;
610 	uint64_t tmr_mc_addr;
611 
612 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
613 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
614 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
615 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
616 
617 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
618 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
619 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
620 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
621 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
622 
623 	offset = AMDGPU_VCE_FIRMWARE_OFFSET;
624 
625 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
626 		tmr_mc_addr = (uint64_t)(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi) << 32 |
627 										adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
628 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
629 			(tmr_mc_addr >> 8));
630 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
631 			(tmr_mc_addr >> 40) & 0xff);
632 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
633 	} else {
634 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
635 			(adev->vce.gpu_addr >> 8));
636 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
637 			(adev->vce.gpu_addr >> 40) & 0xff);
638 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
639 	}
640 
641 	size = VCE_V4_0_FW_SIZE;
642 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
643 
644 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
645 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
646 	offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
647 	size = VCE_V4_0_STACK_SIZE;
648 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
649 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
650 
651 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
652 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
653 	offset += size;
654 	size = VCE_V4_0_DATA_SIZE;
655 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
656 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
657 
658 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
659 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
660 			VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
661 			~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
662 }
663 
664 static int vce_v4_0_set_clockgating_state(void *handle,
665 					  enum amd_clockgating_state state)
666 {
667 	/* needed for driver unload*/
668 	return 0;
669 }
670 
671 #if 0
672 static bool vce_v4_0_is_idle(void *handle)
673 {
674 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
675 	u32 mask = 0;
676 
677 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
678 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
679 
680 	return !(RREG32(mmSRBM_STATUS2) & mask);
681 }
682 
683 static int vce_v4_0_wait_for_idle(void *handle)
684 {
685 	unsigned i;
686 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
687 
688 	for (i = 0; i < adev->usec_timeout; i++)
689 		if (vce_v4_0_is_idle(handle))
690 			return 0;
691 
692 	return -ETIMEDOUT;
693 }
694 
695 #define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
696 #define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
697 #define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
698 #define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
699 				      VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
700 
701 static bool vce_v4_0_check_soft_reset(void *handle)
702 {
703 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
704 	u32 srbm_soft_reset = 0;
705 
706 	/* According to VCE team , we should use VCE_STATUS instead
707 	 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
708 	 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
709 	 * instance's registers are accessed
710 	 * (0 for 1st instance, 10 for 2nd instance).
711 	 *
712 	 *VCE_STATUS
713 	 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
714 	 *|----+----+-----------+----+----+----+----------+---------+----|
715 	 *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
716 	 *
717 	 * VCE team suggest use bit 3--bit 6 for busy status check
718 	 */
719 	mutex_lock(&adev->grbm_idx_mutex);
720 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
721 	if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
722 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
723 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
724 	}
725 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
726 	if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
727 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
728 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
729 	}
730 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
731 	mutex_unlock(&adev->grbm_idx_mutex);
732 
733 	if (srbm_soft_reset) {
734 		adev->vce.srbm_soft_reset = srbm_soft_reset;
735 		return true;
736 	} else {
737 		adev->vce.srbm_soft_reset = 0;
738 		return false;
739 	}
740 }
741 
742 static int vce_v4_0_soft_reset(void *handle)
743 {
744 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
745 	u32 srbm_soft_reset;
746 
747 	if (!adev->vce.srbm_soft_reset)
748 		return 0;
749 	srbm_soft_reset = adev->vce.srbm_soft_reset;
750 
751 	if (srbm_soft_reset) {
752 		u32 tmp;
753 
754 		tmp = RREG32(mmSRBM_SOFT_RESET);
755 		tmp |= srbm_soft_reset;
756 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
757 		WREG32(mmSRBM_SOFT_RESET, tmp);
758 		tmp = RREG32(mmSRBM_SOFT_RESET);
759 
760 		udelay(50);
761 
762 		tmp &= ~srbm_soft_reset;
763 		WREG32(mmSRBM_SOFT_RESET, tmp);
764 		tmp = RREG32(mmSRBM_SOFT_RESET);
765 
766 		/* Wait a little for things to settle down */
767 		udelay(50);
768 	}
769 
770 	return 0;
771 }
772 
773 static int vce_v4_0_pre_soft_reset(void *handle)
774 {
775 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
776 
777 	if (!adev->vce.srbm_soft_reset)
778 		return 0;
779 
780 	mdelay(5);
781 
782 	return vce_v4_0_suspend(adev);
783 }
784 
785 
786 static int vce_v4_0_post_soft_reset(void *handle)
787 {
788 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
789 
790 	if (!adev->vce.srbm_soft_reset)
791 		return 0;
792 
793 	mdelay(5);
794 
795 	return vce_v4_0_resume(adev);
796 }
797 
798 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
799 {
800 	u32 tmp, data;
801 
802 	tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
803 	if (override)
804 		data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
805 	else
806 		data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
807 
808 	if (tmp != data)
809 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
810 }
811 
812 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
813 					     bool gated)
814 {
815 	u32 data;
816 
817 	/* Set Override to disable Clock Gating */
818 	vce_v4_0_override_vce_clock_gating(adev, true);
819 
820 	/* This function enables MGCG which is controlled by firmware.
821 	   With the clocks in the gated state the core is still
822 	   accessible but the firmware will throttle the clocks on the
823 	   fly as necessary.
824 	*/
825 	if (gated) {
826 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
827 		data |= 0x1ff;
828 		data &= ~0xef0000;
829 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
830 
831 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
832 		data |= 0x3ff000;
833 		data &= ~0xffc00000;
834 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
835 
836 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
837 		data |= 0x2;
838 		data &= ~0x00010000;
839 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
840 
841 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
842 		data |= 0x37f;
843 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
844 
845 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
846 		data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
847 			VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
848 			VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
849 			0x8;
850 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
851 	} else {
852 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
853 		data &= ~0x80010;
854 		data |= 0xe70008;
855 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
856 
857 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
858 		data |= 0xffc00000;
859 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
860 
861 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
862 		data |= 0x10000;
863 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
864 
865 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
866 		data &= ~0xffc00000;
867 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
868 
869 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
870 		data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
871 			  VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
872 			  VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
873 			  0x8);
874 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
875 	}
876 	vce_v4_0_override_vce_clock_gating(adev, false);
877 }
878 
879 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
880 {
881 	u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
882 
883 	if (enable)
884 		tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
885 	else
886 		tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
887 
888 	WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
889 }
890 
891 static int vce_v4_0_set_clockgating_state(void *handle,
892 					  enum amd_clockgating_state state)
893 {
894 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
895 	bool enable = (state == AMD_CG_STATE_GATE);
896 	int i;
897 
898 	if ((adev->asic_type == CHIP_POLARIS10) ||
899 		(adev->asic_type == CHIP_TONGA) ||
900 		(adev->asic_type == CHIP_FIJI))
901 		vce_v4_0_set_bypass_mode(adev, enable);
902 
903 	if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
904 		return 0;
905 
906 	mutex_lock(&adev->grbm_idx_mutex);
907 	for (i = 0; i < 2; i++) {
908 		/* Program VCE Instance 0 or 1 if not harvested */
909 		if (adev->vce.harvest_config & (1 << i))
910 			continue;
911 
912 		WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
913 
914 		if (enable) {
915 			/* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
916 			uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
917 			data &= ~(0xf | 0xff0);
918 			data |= ((0x0 << 0) | (0x04 << 4));
919 			WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
920 
921 			/* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
922 			data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
923 			data &= ~(0xf | 0xff0);
924 			data |= ((0x0 << 0) | (0x04 << 4));
925 			WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
926 		}
927 
928 		vce_v4_0_set_vce_sw_clock_gating(adev, enable);
929 	}
930 
931 	WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
932 	mutex_unlock(&adev->grbm_idx_mutex);
933 
934 	return 0;
935 }
936 #endif
937 
938 static int vce_v4_0_set_powergating_state(void *handle,
939 					  enum amd_powergating_state state)
940 {
941 	/* This doesn't actually powergate the VCE block.
942 	 * That's done in the dpm code via the SMC.  This
943 	 * just re-inits the block as necessary.  The actual
944 	 * gating still happens in the dpm code.  We should
945 	 * revisit this when there is a cleaner line between
946 	 * the smc and the hw blocks
947 	 */
948 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
949 
950 	if (state == AMD_PG_STATE_GATE)
951 		return vce_v4_0_stop(adev);
952 	else
953 		return vce_v4_0_start(adev);
954 }
955 
956 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
957 					struct amdgpu_ib *ib, uint32_t flags)
958 {
959 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
960 
961 	amdgpu_ring_write(ring, VCE_CMD_IB_VM);
962 	amdgpu_ring_write(ring, vmid);
963 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
964 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
965 	amdgpu_ring_write(ring, ib->length_dw);
966 }
967 
968 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
969 			u64 seq, unsigned flags)
970 {
971 	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
972 
973 	amdgpu_ring_write(ring, VCE_CMD_FENCE);
974 	amdgpu_ring_write(ring, addr);
975 	amdgpu_ring_write(ring, upper_32_bits(addr));
976 	amdgpu_ring_write(ring, seq);
977 	amdgpu_ring_write(ring, VCE_CMD_TRAP);
978 }
979 
980 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
981 {
982 	amdgpu_ring_write(ring, VCE_CMD_END);
983 }
984 
985 static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
986 				   uint32_t val, uint32_t mask)
987 {
988 	amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
989 	amdgpu_ring_write(ring,	reg << 2);
990 	amdgpu_ring_write(ring, mask);
991 	amdgpu_ring_write(ring, val);
992 }
993 
994 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
995 				   unsigned int vmid, uint64_t pd_addr)
996 {
997 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
998 
999 	pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
1000 
1001 	/* wait for reg writes */
1002 	vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 +
1003 			       vmid * hub->ctx_addr_distance,
1004 			       lower_32_bits(pd_addr), 0xffffffff);
1005 }
1006 
1007 static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring,
1008 			       uint32_t reg, uint32_t val)
1009 {
1010 	amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
1011 	amdgpu_ring_write(ring,	reg << 2);
1012 	amdgpu_ring_write(ring, val);
1013 }
1014 
1015 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
1016 					struct amdgpu_irq_src *source,
1017 					unsigned type,
1018 					enum amdgpu_interrupt_state state)
1019 {
1020 	uint32_t val = 0;
1021 
1022 	if (!amdgpu_sriov_vf(adev)) {
1023 		if (state == AMDGPU_IRQ_STATE_ENABLE)
1024 			val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
1025 
1026 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
1027 				~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
1028 	}
1029 	return 0;
1030 }
1031 
1032 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1033 				      struct amdgpu_irq_src *source,
1034 				      struct amdgpu_iv_entry *entry)
1035 {
1036 	DRM_DEBUG("IH: VCE\n");
1037 
1038 	switch (entry->src_data[0]) {
1039 	case 0:
1040 	case 1:
1041 	case 2:
1042 		amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
1043 		break;
1044 	default:
1045 		DRM_ERROR("Unhandled interrupt: %d %d\n",
1046 			  entry->src_id, entry->src_data[0]);
1047 		break;
1048 	}
1049 
1050 	return 0;
1051 }
1052 
1053 const struct amd_ip_funcs vce_v4_0_ip_funcs = {
1054 	.name = "vce_v4_0",
1055 	.early_init = vce_v4_0_early_init,
1056 	.late_init = NULL,
1057 	.sw_init = vce_v4_0_sw_init,
1058 	.sw_fini = vce_v4_0_sw_fini,
1059 	.hw_init = vce_v4_0_hw_init,
1060 	.hw_fini = vce_v4_0_hw_fini,
1061 	.suspend = vce_v4_0_suspend,
1062 	.resume = vce_v4_0_resume,
1063 	.is_idle = NULL /* vce_v4_0_is_idle */,
1064 	.wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1065 	.check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1066 	.pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1067 	.soft_reset = NULL /* vce_v4_0_soft_reset */,
1068 	.post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1069 	.set_clockgating_state = vce_v4_0_set_clockgating_state,
1070 	.set_powergating_state = vce_v4_0_set_powergating_state,
1071 };
1072 
1073 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1074 	.type = AMDGPU_RING_TYPE_VCE,
1075 	.align_mask = 0x3f,
1076 	.nop = VCE_CMD_NO_OP,
1077 	.support_64bit_ptrs = false,
1078 	.no_user_fence = true,
1079 	.vmhub = AMDGPU_MMHUB_0,
1080 	.get_rptr = vce_v4_0_ring_get_rptr,
1081 	.get_wptr = vce_v4_0_ring_get_wptr,
1082 	.set_wptr = vce_v4_0_ring_set_wptr,
1083 	.parse_cs = amdgpu_vce_ring_parse_cs_vm,
1084 	.emit_frame_size =
1085 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1086 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1087 		4 + /* vce_v4_0_emit_vm_flush */
1088 		5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1089 		1, /* vce_v4_0_ring_insert_end */
1090 	.emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1091 	.emit_ib = vce_v4_0_ring_emit_ib,
1092 	.emit_vm_flush = vce_v4_0_emit_vm_flush,
1093 	.emit_fence = vce_v4_0_ring_emit_fence,
1094 	.test_ring = amdgpu_vce_ring_test_ring,
1095 	.test_ib = amdgpu_vce_ring_test_ib,
1096 	.insert_nop = amdgpu_ring_insert_nop,
1097 	.insert_end = vce_v4_0_ring_insert_end,
1098 	.pad_ib = amdgpu_ring_generic_pad_ib,
1099 	.begin_use = amdgpu_vce_ring_begin_use,
1100 	.end_use = amdgpu_vce_ring_end_use,
1101 	.emit_wreg = vce_v4_0_emit_wreg,
1102 	.emit_reg_wait = vce_v4_0_emit_reg_wait,
1103 	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1104 };
1105 
1106 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1107 {
1108 	int i;
1109 
1110 	for (i = 0; i < adev->vce.num_rings; i++) {
1111 		adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1112 		adev->vce.ring[i].me = i;
1113 	}
1114 	DRM_INFO("VCE enabled in VM mode\n");
1115 }
1116 
1117 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1118 	.set = vce_v4_0_set_interrupt_state,
1119 	.process = vce_v4_0_process_interrupt,
1120 };
1121 
1122 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1123 {
1124 	adev->vce.irq.num_types = 1;
1125 	adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1126 };
1127 
1128 const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1129 {
1130 	.type = AMD_IP_BLOCK_TYPE_VCE,
1131 	.major = 4,
1132 	.minor = 0,
1133 	.rev = 0,
1134 	.funcs = &vce_v4_0_ip_funcs,
1135 };
1136