xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c (revision 2c7e7835)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  */
26 
27 #include <linux/firmware.h>
28 #include <drm/drmP.h>
29 #include "amdgpu.h"
30 #include "amdgpu_vce.h"
31 #include "soc15.h"
32 #include "soc15d.h"
33 #include "soc15_common.h"
34 #include "mmsch_v1_0.h"
35 
36 #include "vce/vce_4_0_offset.h"
37 #include "vce/vce_4_0_default.h"
38 #include "vce/vce_4_0_sh_mask.h"
39 #include "mmhub/mmhub_1_0_offset.h"
40 #include "mmhub/mmhub_1_0_sh_mask.h"
41 
42 #include "ivsrcid/vce/irqsrcs_vce_4_0.h"
43 
44 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
45 
46 #define VCE_V4_0_FW_SIZE	(384 * 1024)
47 #define VCE_V4_0_STACK_SIZE	(64 * 1024)
48 #define VCE_V4_0_DATA_SIZE	((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
49 
50 static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
51 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
52 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
53 
54 /**
55  * vce_v4_0_ring_get_rptr - get read pointer
56  *
57  * @ring: amdgpu_ring pointer
58  *
59  * Returns the current hardware read pointer
60  */
61 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
62 {
63 	struct amdgpu_device *adev = ring->adev;
64 
65 	if (ring->me == 0)
66 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
67 	else if (ring->me == 1)
68 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
69 	else
70 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
71 }
72 
73 /**
74  * vce_v4_0_ring_get_wptr - get write pointer
75  *
76  * @ring: amdgpu_ring pointer
77  *
78  * Returns the current hardware write pointer
79  */
80 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
81 {
82 	struct amdgpu_device *adev = ring->adev;
83 
84 	if (ring->use_doorbell)
85 		return adev->wb.wb[ring->wptr_offs];
86 
87 	if (ring->me == 0)
88 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
89 	else if (ring->me == 1)
90 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
91 	else
92 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
93 }
94 
95 /**
96  * vce_v4_0_ring_set_wptr - set write pointer
97  *
98  * @ring: amdgpu_ring pointer
99  *
100  * Commits the write pointer to the hardware
101  */
102 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
103 {
104 	struct amdgpu_device *adev = ring->adev;
105 
106 	if (ring->use_doorbell) {
107 		/* XXX check if swapping is necessary on BE */
108 		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
109 		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
110 		return;
111 	}
112 
113 	if (ring->me == 0)
114 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
115 			lower_32_bits(ring->wptr));
116 	else if (ring->me == 1)
117 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
118 			lower_32_bits(ring->wptr));
119 	else
120 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
121 			lower_32_bits(ring->wptr));
122 }
123 
124 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
125 {
126 	int i, j;
127 
128 	for (i = 0; i < 10; ++i) {
129 		for (j = 0; j < 100; ++j) {
130 			uint32_t status =
131 				RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
132 
133 			if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
134 				return 0;
135 			mdelay(10);
136 		}
137 
138 		DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
139 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
140 				VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
141 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
142 		mdelay(10);
143 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
144 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
145 		mdelay(10);
146 
147 	}
148 
149 	return -ETIMEDOUT;
150 }
151 
152 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
153 				struct amdgpu_mm_table *table)
154 {
155 	uint32_t data = 0, loop;
156 	uint64_t addr = table->gpu_addr;
157 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
158 	uint32_t size;
159 
160 	size = header->header_size + header->vce_table_size + header->uvd_table_size;
161 
162 	/* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
163 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
164 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
165 
166 	/* 2, update vmid of descriptor */
167 	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
168 	data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
169 	data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
170 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
171 
172 	/* 3, notify mmsch about the size of this descriptor */
173 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
174 
175 	/* 4, set resp to zero */
176 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
177 
178 	WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
179 	adev->wb.wb[adev->vce.ring[0].wptr_offs] = 0;
180 	adev->vce.ring[0].wptr = 0;
181 	adev->vce.ring[0].wptr_old = 0;
182 
183 	/* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
184 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
185 
186 	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
187 	loop = 1000;
188 	while ((data & 0x10000002) != 0x10000002) {
189 		udelay(10);
190 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
191 		loop--;
192 		if (!loop)
193 			break;
194 	}
195 
196 	if (!loop) {
197 		dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
198 		return -EBUSY;
199 	}
200 
201 	return 0;
202 }
203 
204 static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
205 {
206 	struct amdgpu_ring *ring;
207 	uint32_t offset, size;
208 	uint32_t table_size = 0;
209 	struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
210 	struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
211 	struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
212 	struct mmsch_v1_0_cmd_end end = { { 0 } };
213 	uint32_t *init_table = adev->virt.mm_table.cpu_addr;
214 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
215 
216 	direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
217 	direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
218 	direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
219 	end.cmd_header.command_type = MMSCH_COMMAND__END;
220 
221 	if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
222 		header->version = MMSCH_VERSION;
223 		header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
224 
225 		if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
226 			header->vce_table_offset = header->header_size;
227 		else
228 			header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
229 
230 		init_table += header->vce_table_offset;
231 
232 		ring = &adev->vce.ring[0];
233 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
234 					    lower_32_bits(ring->gpu_addr));
235 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
236 					    upper_32_bits(ring->gpu_addr));
237 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
238 					    ring->ring_size / 4);
239 
240 		/* BEGING OF MC_RESUME */
241 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
242 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
243 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
244 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
245 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
246 
247 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
248 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
249 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
250 						adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
251 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
252 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
253 						(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff);
254 		} else {
255 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
256 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
257 						adev->vce.gpu_addr >> 8);
258 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
259 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
260 						(adev->vce.gpu_addr >> 40) & 0xff);
261 		}
262 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
263 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
264 						adev->vce.gpu_addr >> 8);
265 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
266 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR1),
267 						(adev->vce.gpu_addr >> 40) & 0xff);
268 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
269 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
270 						adev->vce.gpu_addr >> 8);
271 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
272 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR2),
273 						(adev->vce.gpu_addr >> 40) & 0xff);
274 
275 		offset = AMDGPU_VCE_FIRMWARE_OFFSET;
276 		size = VCE_V4_0_FW_SIZE;
277 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
278 					offset & ~0x0f000000);
279 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
280 
281 		offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
282 		size = VCE_V4_0_STACK_SIZE;
283 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
284 					(offset & ~0x0f000000) | (1 << 24));
285 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
286 
287 		offset += size;
288 		size = VCE_V4_0_DATA_SIZE;
289 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
290 					(offset & ~0x0f000000) | (2 << 24));
291 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
292 
293 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
294 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
295 						   VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
296 						   VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
297 
298 		/* end of MC_RESUME */
299 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
300 						   VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
301 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
302 						   ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
303 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
304 						   ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
305 
306 		MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
307 					      VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
308 					      VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
309 
310 		/* clear BUSY flag */
311 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
312 						   ~VCE_STATUS__JOB_BUSY_MASK, 0);
313 
314 		/* add end packet */
315 		memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
316 		table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
317 		header->vce_table_size = table_size;
318 	}
319 
320 	return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
321 }
322 
323 /**
324  * vce_v4_0_start - start VCE block
325  *
326  * @adev: amdgpu_device pointer
327  *
328  * Setup and start the VCE block
329  */
330 static int vce_v4_0_start(struct amdgpu_device *adev)
331 {
332 	struct amdgpu_ring *ring;
333 	int r;
334 
335 	ring = &adev->vce.ring[0];
336 
337 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
338 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
339 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
340 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
341 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
342 
343 	ring = &adev->vce.ring[1];
344 
345 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
346 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
347 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
348 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
349 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
350 
351 	ring = &adev->vce.ring[2];
352 
353 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
354 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
355 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
356 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
357 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
358 
359 	vce_v4_0_mc_resume(adev);
360 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
361 			~VCE_STATUS__JOB_BUSY_MASK);
362 
363 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
364 
365 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
366 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
367 	mdelay(100);
368 
369 	r = vce_v4_0_firmware_loaded(adev);
370 
371 	/* clear BUSY flag */
372 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
373 
374 	if (r) {
375 		DRM_ERROR("VCE not responding, giving up!!!\n");
376 		return r;
377 	}
378 
379 	return 0;
380 }
381 
382 static int vce_v4_0_stop(struct amdgpu_device *adev)
383 {
384 
385 	/* Disable VCPU */
386 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
387 
388 	/* hold on ECPU */
389 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
390 			VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
391 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
392 
393 	/* clear VCE_STATUS */
394 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0);
395 
396 	/* Set Clock-Gating off */
397 	/* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
398 		vce_v4_0_set_vce_sw_clock_gating(adev, false);
399 	*/
400 
401 	return 0;
402 }
403 
404 static int vce_v4_0_early_init(void *handle)
405 {
406 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
407 
408 	if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
409 		adev->vce.num_rings = 1;
410 	else
411 		adev->vce.num_rings = 3;
412 
413 	vce_v4_0_set_ring_funcs(adev);
414 	vce_v4_0_set_irq_funcs(adev);
415 
416 	return 0;
417 }
418 
419 static int vce_v4_0_sw_init(void *handle)
420 {
421 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
422 	struct amdgpu_ring *ring;
423 
424 	unsigned size;
425 	int r, i;
426 
427 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
428 	if (r)
429 		return r;
430 
431 	size  = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE;
432 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
433 		size += VCE_V4_0_FW_SIZE;
434 
435 	r = amdgpu_vce_sw_init(adev, size);
436 	if (r)
437 		return r;
438 
439 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
440 		const struct common_firmware_header *hdr;
441 		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
442 
443 		adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL);
444 		if (!adev->vce.saved_bo)
445 			return -ENOMEM;
446 
447 		hdr = (const struct common_firmware_header *)adev->vce.fw->data;
448 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
449 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
450 		adev->firmware.fw_size +=
451 			ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
452 		DRM_INFO("PSP loading VCE firmware\n");
453 	} else {
454 		r = amdgpu_vce_resume(adev);
455 		if (r)
456 			return r;
457 	}
458 
459 	for (i = 0; i < adev->vce.num_rings; i++) {
460 		ring = &adev->vce.ring[i];
461 		sprintf(ring->name, "vce%d", i);
462 		if (amdgpu_sriov_vf(adev)) {
463 			/* DOORBELL only works under SRIOV */
464 			ring->use_doorbell = true;
465 
466 			/* currently only use the first encoding ring for sriov,
467 			 * so set unused location for other unused rings.
468 			 */
469 			if (i == 0)
470 				ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring0_1 * 2;
471 			else
472 				ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring2_3 * 2 + 1;
473 		}
474 		r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
475 		if (r)
476 			return r;
477 	}
478 
479 
480 	r = amdgpu_vce_entity_init(adev);
481 	if (r)
482 		return r;
483 
484 	r = amdgpu_virt_alloc_mm_table(adev);
485 	if (r)
486 		return r;
487 
488 	return r;
489 }
490 
491 static int vce_v4_0_sw_fini(void *handle)
492 {
493 	int r;
494 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
495 
496 	/* free MM table */
497 	amdgpu_virt_free_mm_table(adev);
498 
499 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
500 		kvfree(adev->vce.saved_bo);
501 		adev->vce.saved_bo = NULL;
502 	}
503 
504 	r = amdgpu_vce_suspend(adev);
505 	if (r)
506 		return r;
507 
508 	return amdgpu_vce_sw_fini(adev);
509 }
510 
511 static int vce_v4_0_hw_init(void *handle)
512 {
513 	int r, i;
514 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
515 
516 	if (amdgpu_sriov_vf(adev))
517 		r = vce_v4_0_sriov_start(adev);
518 	else
519 		r = vce_v4_0_start(adev);
520 	if (r)
521 		return r;
522 
523 	for (i = 0; i < adev->vce.num_rings; i++) {
524 		r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
525 		if (r)
526 			return r;
527 	}
528 
529 	DRM_INFO("VCE initialized successfully.\n");
530 
531 	return 0;
532 }
533 
534 static int vce_v4_0_hw_fini(void *handle)
535 {
536 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
537 	int i;
538 
539 	if (!amdgpu_sriov_vf(adev)) {
540 		/* vce_v4_0_wait_for_idle(handle); */
541 		vce_v4_0_stop(adev);
542 	} else {
543 		/* full access mode, so don't touch any VCE register */
544 		DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
545 	}
546 
547 	for (i = 0; i < adev->vce.num_rings; i++)
548 		adev->vce.ring[i].sched.ready = false;
549 
550 	return 0;
551 }
552 
553 static int vce_v4_0_suspend(void *handle)
554 {
555 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
556 	int r;
557 
558 	if (adev->vce.vcpu_bo == NULL)
559 		return 0;
560 
561 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
562 		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
563 		void *ptr = adev->vce.cpu_addr;
564 
565 		memcpy_fromio(adev->vce.saved_bo, ptr, size);
566 	}
567 
568 	r = vce_v4_0_hw_fini(adev);
569 	if (r)
570 		return r;
571 
572 	return amdgpu_vce_suspend(adev);
573 }
574 
575 static int vce_v4_0_resume(void *handle)
576 {
577 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
578 	int r;
579 
580 	if (adev->vce.vcpu_bo == NULL)
581 		return -EINVAL;
582 
583 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
584 		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
585 		void *ptr = adev->vce.cpu_addr;
586 
587 		memcpy_toio(ptr, adev->vce.saved_bo, size);
588 	} else {
589 		r = amdgpu_vce_resume(adev);
590 		if (r)
591 			return r;
592 	}
593 
594 	return vce_v4_0_hw_init(adev);
595 }
596 
597 static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
598 {
599 	uint32_t offset, size;
600 	uint64_t tmr_mc_addr;
601 
602 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
603 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
604 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
605 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
606 
607 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
608 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
609 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
610 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
611 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
612 
613 	offset = AMDGPU_VCE_FIRMWARE_OFFSET;
614 
615 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
616 		tmr_mc_addr = (uint64_t)(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi) << 32 |
617 										adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
618 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
619 			(tmr_mc_addr >> 8));
620 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
621 			(tmr_mc_addr >> 40) & 0xff);
622 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
623 	} else {
624 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
625 			(adev->vce.gpu_addr >> 8));
626 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
627 			(adev->vce.gpu_addr >> 40) & 0xff);
628 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
629 	}
630 
631 	size = VCE_V4_0_FW_SIZE;
632 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
633 
634 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
635 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
636 	offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
637 	size = VCE_V4_0_STACK_SIZE;
638 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
639 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
640 
641 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
642 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
643 	offset += size;
644 	size = VCE_V4_0_DATA_SIZE;
645 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
646 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
647 
648 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
649 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
650 			VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
651 			~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
652 }
653 
654 static int vce_v4_0_set_clockgating_state(void *handle,
655 					  enum amd_clockgating_state state)
656 {
657 	/* needed for driver unload*/
658 	return 0;
659 }
660 
661 #if 0
662 static bool vce_v4_0_is_idle(void *handle)
663 {
664 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
665 	u32 mask = 0;
666 
667 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
668 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
669 
670 	return !(RREG32(mmSRBM_STATUS2) & mask);
671 }
672 
673 static int vce_v4_0_wait_for_idle(void *handle)
674 {
675 	unsigned i;
676 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
677 
678 	for (i = 0; i < adev->usec_timeout; i++)
679 		if (vce_v4_0_is_idle(handle))
680 			return 0;
681 
682 	return -ETIMEDOUT;
683 }
684 
685 #define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
686 #define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
687 #define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
688 #define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
689 				      VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
690 
691 static bool vce_v4_0_check_soft_reset(void *handle)
692 {
693 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
694 	u32 srbm_soft_reset = 0;
695 
696 	/* According to VCE team , we should use VCE_STATUS instead
697 	 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
698 	 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
699 	 * instance's registers are accessed
700 	 * (0 for 1st instance, 10 for 2nd instance).
701 	 *
702 	 *VCE_STATUS
703 	 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
704 	 *|----+----+-----------+----+----+----+----------+---------+----|
705 	 *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
706 	 *
707 	 * VCE team suggest use bit 3--bit 6 for busy status check
708 	 */
709 	mutex_lock(&adev->grbm_idx_mutex);
710 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
711 	if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
712 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
713 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
714 	}
715 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
716 	if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
717 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
718 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
719 	}
720 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
721 	mutex_unlock(&adev->grbm_idx_mutex);
722 
723 	if (srbm_soft_reset) {
724 		adev->vce.srbm_soft_reset = srbm_soft_reset;
725 		return true;
726 	} else {
727 		adev->vce.srbm_soft_reset = 0;
728 		return false;
729 	}
730 }
731 
732 static int vce_v4_0_soft_reset(void *handle)
733 {
734 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
735 	u32 srbm_soft_reset;
736 
737 	if (!adev->vce.srbm_soft_reset)
738 		return 0;
739 	srbm_soft_reset = adev->vce.srbm_soft_reset;
740 
741 	if (srbm_soft_reset) {
742 		u32 tmp;
743 
744 		tmp = RREG32(mmSRBM_SOFT_RESET);
745 		tmp |= srbm_soft_reset;
746 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
747 		WREG32(mmSRBM_SOFT_RESET, tmp);
748 		tmp = RREG32(mmSRBM_SOFT_RESET);
749 
750 		udelay(50);
751 
752 		tmp &= ~srbm_soft_reset;
753 		WREG32(mmSRBM_SOFT_RESET, tmp);
754 		tmp = RREG32(mmSRBM_SOFT_RESET);
755 
756 		/* Wait a little for things to settle down */
757 		udelay(50);
758 	}
759 
760 	return 0;
761 }
762 
763 static int vce_v4_0_pre_soft_reset(void *handle)
764 {
765 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
766 
767 	if (!adev->vce.srbm_soft_reset)
768 		return 0;
769 
770 	mdelay(5);
771 
772 	return vce_v4_0_suspend(adev);
773 }
774 
775 
776 static int vce_v4_0_post_soft_reset(void *handle)
777 {
778 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
779 
780 	if (!adev->vce.srbm_soft_reset)
781 		return 0;
782 
783 	mdelay(5);
784 
785 	return vce_v4_0_resume(adev);
786 }
787 
788 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
789 {
790 	u32 tmp, data;
791 
792 	tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
793 	if (override)
794 		data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
795 	else
796 		data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
797 
798 	if (tmp != data)
799 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
800 }
801 
802 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
803 					     bool gated)
804 {
805 	u32 data;
806 
807 	/* Set Override to disable Clock Gating */
808 	vce_v4_0_override_vce_clock_gating(adev, true);
809 
810 	/* This function enables MGCG which is controlled by firmware.
811 	   With the clocks in the gated state the core is still
812 	   accessible but the firmware will throttle the clocks on the
813 	   fly as necessary.
814 	*/
815 	if (gated) {
816 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
817 		data |= 0x1ff;
818 		data &= ~0xef0000;
819 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
820 
821 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
822 		data |= 0x3ff000;
823 		data &= ~0xffc00000;
824 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
825 
826 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
827 		data |= 0x2;
828 		data &= ~0x00010000;
829 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
830 
831 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
832 		data |= 0x37f;
833 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
834 
835 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
836 		data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
837 			VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
838 			VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
839 			0x8;
840 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
841 	} else {
842 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
843 		data &= ~0x80010;
844 		data |= 0xe70008;
845 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
846 
847 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
848 		data |= 0xffc00000;
849 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
850 
851 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
852 		data |= 0x10000;
853 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
854 
855 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
856 		data &= ~0xffc00000;
857 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
858 
859 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
860 		data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
861 			  VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
862 			  VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
863 			  0x8);
864 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
865 	}
866 	vce_v4_0_override_vce_clock_gating(adev, false);
867 }
868 
869 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
870 {
871 	u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
872 
873 	if (enable)
874 		tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
875 	else
876 		tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
877 
878 	WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
879 }
880 
881 static int vce_v4_0_set_clockgating_state(void *handle,
882 					  enum amd_clockgating_state state)
883 {
884 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
885 	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
886 	int i;
887 
888 	if ((adev->asic_type == CHIP_POLARIS10) ||
889 		(adev->asic_type == CHIP_TONGA) ||
890 		(adev->asic_type == CHIP_FIJI))
891 		vce_v4_0_set_bypass_mode(adev, enable);
892 
893 	if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
894 		return 0;
895 
896 	mutex_lock(&adev->grbm_idx_mutex);
897 	for (i = 0; i < 2; i++) {
898 		/* Program VCE Instance 0 or 1 if not harvested */
899 		if (adev->vce.harvest_config & (1 << i))
900 			continue;
901 
902 		WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
903 
904 		if (enable) {
905 			/* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
906 			uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
907 			data &= ~(0xf | 0xff0);
908 			data |= ((0x0 << 0) | (0x04 << 4));
909 			WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
910 
911 			/* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
912 			data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
913 			data &= ~(0xf | 0xff0);
914 			data |= ((0x0 << 0) | (0x04 << 4));
915 			WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
916 		}
917 
918 		vce_v4_0_set_vce_sw_clock_gating(adev, enable);
919 	}
920 
921 	WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
922 	mutex_unlock(&adev->grbm_idx_mutex);
923 
924 	return 0;
925 }
926 #endif
927 
928 static int vce_v4_0_set_powergating_state(void *handle,
929 					  enum amd_powergating_state state)
930 {
931 	/* This doesn't actually powergate the VCE block.
932 	 * That's done in the dpm code via the SMC.  This
933 	 * just re-inits the block as necessary.  The actual
934 	 * gating still happens in the dpm code.  We should
935 	 * revisit this when there is a cleaner line between
936 	 * the smc and the hw blocks
937 	 */
938 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
939 
940 	if (state == AMD_PG_STATE_GATE)
941 		return vce_v4_0_stop(adev);
942 	else
943 		return vce_v4_0_start(adev);
944 }
945 
946 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
947 					struct amdgpu_ib *ib, uint32_t flags)
948 {
949 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
950 
951 	amdgpu_ring_write(ring, VCE_CMD_IB_VM);
952 	amdgpu_ring_write(ring, vmid);
953 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
954 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
955 	amdgpu_ring_write(ring, ib->length_dw);
956 }
957 
958 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
959 			u64 seq, unsigned flags)
960 {
961 	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
962 
963 	amdgpu_ring_write(ring, VCE_CMD_FENCE);
964 	amdgpu_ring_write(ring, addr);
965 	amdgpu_ring_write(ring, upper_32_bits(addr));
966 	amdgpu_ring_write(ring, seq);
967 	amdgpu_ring_write(ring, VCE_CMD_TRAP);
968 }
969 
970 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
971 {
972 	amdgpu_ring_write(ring, VCE_CMD_END);
973 }
974 
975 static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
976 				   uint32_t val, uint32_t mask)
977 {
978 	amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
979 	amdgpu_ring_write(ring,	reg << 2);
980 	amdgpu_ring_write(ring, mask);
981 	amdgpu_ring_write(ring, val);
982 }
983 
984 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
985 				   unsigned int vmid, uint64_t pd_addr)
986 {
987 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
988 
989 	pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
990 
991 	/* wait for reg writes */
992 	vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2,
993 			       lower_32_bits(pd_addr), 0xffffffff);
994 }
995 
996 static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring,
997 			       uint32_t reg, uint32_t val)
998 {
999 	amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
1000 	amdgpu_ring_write(ring,	reg << 2);
1001 	amdgpu_ring_write(ring, val);
1002 }
1003 
1004 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
1005 					struct amdgpu_irq_src *source,
1006 					unsigned type,
1007 					enum amdgpu_interrupt_state state)
1008 {
1009 	uint32_t val = 0;
1010 
1011 	if (!amdgpu_sriov_vf(adev)) {
1012 		if (state == AMDGPU_IRQ_STATE_ENABLE)
1013 			val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
1014 
1015 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
1016 				~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
1017 	}
1018 	return 0;
1019 }
1020 
1021 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1022 				      struct amdgpu_irq_src *source,
1023 				      struct amdgpu_iv_entry *entry)
1024 {
1025 	DRM_DEBUG("IH: VCE\n");
1026 
1027 	switch (entry->src_data[0]) {
1028 	case 0:
1029 	case 1:
1030 	case 2:
1031 		amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
1032 		break;
1033 	default:
1034 		DRM_ERROR("Unhandled interrupt: %d %d\n",
1035 			  entry->src_id, entry->src_data[0]);
1036 		break;
1037 	}
1038 
1039 	return 0;
1040 }
1041 
1042 const struct amd_ip_funcs vce_v4_0_ip_funcs = {
1043 	.name = "vce_v4_0",
1044 	.early_init = vce_v4_0_early_init,
1045 	.late_init = NULL,
1046 	.sw_init = vce_v4_0_sw_init,
1047 	.sw_fini = vce_v4_0_sw_fini,
1048 	.hw_init = vce_v4_0_hw_init,
1049 	.hw_fini = vce_v4_0_hw_fini,
1050 	.suspend = vce_v4_0_suspend,
1051 	.resume = vce_v4_0_resume,
1052 	.is_idle = NULL /* vce_v4_0_is_idle */,
1053 	.wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1054 	.check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1055 	.pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1056 	.soft_reset = NULL /* vce_v4_0_soft_reset */,
1057 	.post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1058 	.set_clockgating_state = vce_v4_0_set_clockgating_state,
1059 	.set_powergating_state = vce_v4_0_set_powergating_state,
1060 };
1061 
1062 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1063 	.type = AMDGPU_RING_TYPE_VCE,
1064 	.align_mask = 0x3f,
1065 	.nop = VCE_CMD_NO_OP,
1066 	.support_64bit_ptrs = false,
1067 	.vmhub = AMDGPU_MMHUB,
1068 	.get_rptr = vce_v4_0_ring_get_rptr,
1069 	.get_wptr = vce_v4_0_ring_get_wptr,
1070 	.set_wptr = vce_v4_0_ring_set_wptr,
1071 	.parse_cs = amdgpu_vce_ring_parse_cs_vm,
1072 	.emit_frame_size =
1073 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1074 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1075 		4 + /* vce_v4_0_emit_vm_flush */
1076 		5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1077 		1, /* vce_v4_0_ring_insert_end */
1078 	.emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1079 	.emit_ib = vce_v4_0_ring_emit_ib,
1080 	.emit_vm_flush = vce_v4_0_emit_vm_flush,
1081 	.emit_fence = vce_v4_0_ring_emit_fence,
1082 	.test_ring = amdgpu_vce_ring_test_ring,
1083 	.test_ib = amdgpu_vce_ring_test_ib,
1084 	.insert_nop = amdgpu_ring_insert_nop,
1085 	.insert_end = vce_v4_0_ring_insert_end,
1086 	.pad_ib = amdgpu_ring_generic_pad_ib,
1087 	.begin_use = amdgpu_vce_ring_begin_use,
1088 	.end_use = amdgpu_vce_ring_end_use,
1089 	.emit_wreg = vce_v4_0_emit_wreg,
1090 	.emit_reg_wait = vce_v4_0_emit_reg_wait,
1091 	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1092 };
1093 
1094 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1095 {
1096 	int i;
1097 
1098 	for (i = 0; i < adev->vce.num_rings; i++) {
1099 		adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1100 		adev->vce.ring[i].me = i;
1101 	}
1102 	DRM_INFO("VCE enabled in VM mode\n");
1103 }
1104 
1105 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1106 	.set = vce_v4_0_set_interrupt_state,
1107 	.process = vce_v4_0_process_interrupt,
1108 };
1109 
1110 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1111 {
1112 	adev->vce.irq.num_types = 1;
1113 	adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1114 };
1115 
1116 const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1117 {
1118 	.type = AMD_IP_BLOCK_TYPE_VCE,
1119 	.major = 4,
1120 	.minor = 0,
1121 	.rev = 0,
1122 	.funcs = &vce_v4_0_ip_funcs,
1123 };
1124