xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c (revision ba61bb17496d1664bf7c5c2fd650d5fd78bd0a92)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  */
26 
27 #include <linux/firmware.h>
28 #include <drm/drmP.h>
29 #include "amdgpu.h"
30 #include "amdgpu_vce.h"
31 #include "soc15.h"
32 #include "soc15d.h"
33 #include "soc15_common.h"
34 #include "mmsch_v1_0.h"
35 
36 #include "vce/vce_4_0_offset.h"
37 #include "vce/vce_4_0_default.h"
38 #include "vce/vce_4_0_sh_mask.h"
39 #include "mmhub/mmhub_1_0_offset.h"
40 #include "mmhub/mmhub_1_0_sh_mask.h"
41 
42 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
43 
44 #define VCE_V4_0_FW_SIZE	(384 * 1024)
45 #define VCE_V4_0_STACK_SIZE	(64 * 1024)
46 #define VCE_V4_0_DATA_SIZE	((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
47 
48 static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
49 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
50 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
51 
52 /**
53  * vce_v4_0_ring_get_rptr - get read pointer
54  *
55  * @ring: amdgpu_ring pointer
56  *
57  * Returns the current hardware read pointer
58  */
59 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
60 {
61 	struct amdgpu_device *adev = ring->adev;
62 
63 	if (ring->me == 0)
64 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
65 	else if (ring->me == 1)
66 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
67 	else
68 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
69 }
70 
71 /**
72  * vce_v4_0_ring_get_wptr - get write pointer
73  *
74  * @ring: amdgpu_ring pointer
75  *
76  * Returns the current hardware write pointer
77  */
78 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
79 {
80 	struct amdgpu_device *adev = ring->adev;
81 
82 	if (ring->use_doorbell)
83 		return adev->wb.wb[ring->wptr_offs];
84 
85 	if (ring->me == 0)
86 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
87 	else if (ring->me == 1)
88 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
89 	else
90 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
91 }
92 
93 /**
94  * vce_v4_0_ring_set_wptr - set write pointer
95  *
96  * @ring: amdgpu_ring pointer
97  *
98  * Commits the write pointer to the hardware
99  */
100 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
101 {
102 	struct amdgpu_device *adev = ring->adev;
103 
104 	if (ring->use_doorbell) {
105 		/* XXX check if swapping is necessary on BE */
106 		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
107 		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
108 		return;
109 	}
110 
111 	if (ring->me == 0)
112 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
113 			lower_32_bits(ring->wptr));
114 	else if (ring->me == 1)
115 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
116 			lower_32_bits(ring->wptr));
117 	else
118 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
119 			lower_32_bits(ring->wptr));
120 }
121 
122 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
123 {
124 	int i, j;
125 
126 	for (i = 0; i < 10; ++i) {
127 		for (j = 0; j < 100; ++j) {
128 			uint32_t status =
129 				RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
130 
131 			if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
132 				return 0;
133 			mdelay(10);
134 		}
135 
136 		DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
137 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
138 				VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
139 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
140 		mdelay(10);
141 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
142 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
143 		mdelay(10);
144 
145 	}
146 
147 	return -ETIMEDOUT;
148 }
149 
150 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
151 				struct amdgpu_mm_table *table)
152 {
153 	uint32_t data = 0, loop;
154 	uint64_t addr = table->gpu_addr;
155 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
156 	uint32_t size;
157 
158 	size = header->header_size + header->vce_table_size + header->uvd_table_size;
159 
160 	/* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
161 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
162 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
163 
164 	/* 2, update vmid of descriptor */
165 	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
166 	data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
167 	data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
168 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
169 
170 	/* 3, notify mmsch about the size of this descriptor */
171 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
172 
173 	/* 4, set resp to zero */
174 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
175 
176 	WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
177 	adev->wb.wb[adev->vce.ring[0].wptr_offs] = 0;
178 	adev->vce.ring[0].wptr = 0;
179 	adev->vce.ring[0].wptr_old = 0;
180 
181 	/* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
182 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
183 
184 	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
185 	loop = 1000;
186 	while ((data & 0x10000002) != 0x10000002) {
187 		udelay(10);
188 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
189 		loop--;
190 		if (!loop)
191 			break;
192 	}
193 
194 	if (!loop) {
195 		dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
196 		return -EBUSY;
197 	}
198 
199 	return 0;
200 }
201 
202 static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
203 {
204 	struct amdgpu_ring *ring;
205 	uint32_t offset, size;
206 	uint32_t table_size = 0;
207 	struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
208 	struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
209 	struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
210 	struct mmsch_v1_0_cmd_end end = { { 0 } };
211 	uint32_t *init_table = adev->virt.mm_table.cpu_addr;
212 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
213 
214 	direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
215 	direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
216 	direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
217 	end.cmd_header.command_type = MMSCH_COMMAND__END;
218 
219 	if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
220 		header->version = MMSCH_VERSION;
221 		header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
222 
223 		if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
224 			header->vce_table_offset = header->header_size;
225 		else
226 			header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
227 
228 		init_table += header->vce_table_offset;
229 
230 		ring = &adev->vce.ring[0];
231 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
232 					    lower_32_bits(ring->gpu_addr));
233 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
234 					    upper_32_bits(ring->gpu_addr));
235 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
236 					    ring->ring_size / 4);
237 
238 		/* BEGING OF MC_RESUME */
239 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
240 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
241 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
242 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
243 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
244 
245 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
246 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
247 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
248 						adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
249 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
250 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
251 						(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff);
252 		} else {
253 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
254 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
255 						adev->vce.gpu_addr >> 8);
256 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
257 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
258 						(adev->vce.gpu_addr >> 40) & 0xff);
259 		}
260 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
261 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
262 						adev->vce.gpu_addr >> 8);
263 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
264 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR1),
265 						(adev->vce.gpu_addr >> 40) & 0xff);
266 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
267 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
268 						adev->vce.gpu_addr >> 8);
269 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
270 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR2),
271 						(adev->vce.gpu_addr >> 40) & 0xff);
272 
273 		offset = AMDGPU_VCE_FIRMWARE_OFFSET;
274 		size = VCE_V4_0_FW_SIZE;
275 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
276 					offset & ~0x0f000000);
277 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
278 
279 		offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
280 		size = VCE_V4_0_STACK_SIZE;
281 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
282 					(offset & ~0x0f000000) | (1 << 24));
283 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
284 
285 		offset += size;
286 		size = VCE_V4_0_DATA_SIZE;
287 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
288 					(offset & ~0x0f000000) | (2 << 24));
289 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
290 
291 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
292 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
293 						   VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
294 						   VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
295 
296 		/* end of MC_RESUME */
297 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
298 						   VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
299 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
300 						   ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
301 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
302 						   ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
303 
304 		MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
305 					      VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
306 					      VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
307 
308 		/* clear BUSY flag */
309 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
310 						   ~VCE_STATUS__JOB_BUSY_MASK, 0);
311 
312 		/* add end packet */
313 		memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
314 		table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
315 		header->vce_table_size = table_size;
316 	}
317 
318 	return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
319 }
320 
321 /**
322  * vce_v4_0_start - start VCE block
323  *
324  * @adev: amdgpu_device pointer
325  *
326  * Setup and start the VCE block
327  */
328 static int vce_v4_0_start(struct amdgpu_device *adev)
329 {
330 	struct amdgpu_ring *ring;
331 	int r;
332 
333 	ring = &adev->vce.ring[0];
334 
335 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
336 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
337 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
338 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
339 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
340 
341 	ring = &adev->vce.ring[1];
342 
343 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
344 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
345 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
346 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
347 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
348 
349 	ring = &adev->vce.ring[2];
350 
351 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
352 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
353 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
354 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
355 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
356 
357 	vce_v4_0_mc_resume(adev);
358 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
359 			~VCE_STATUS__JOB_BUSY_MASK);
360 
361 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
362 
363 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
364 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
365 	mdelay(100);
366 
367 	r = vce_v4_0_firmware_loaded(adev);
368 
369 	/* clear BUSY flag */
370 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
371 
372 	if (r) {
373 		DRM_ERROR("VCE not responding, giving up!!!\n");
374 		return r;
375 	}
376 
377 	return 0;
378 }
379 
380 static int vce_v4_0_stop(struct amdgpu_device *adev)
381 {
382 
383 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
384 
385 	/* hold on ECPU */
386 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
387 			VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
388 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
389 
390 	/* clear BUSY flag */
391 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
392 
393 	/* Set Clock-Gating off */
394 	/* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
395 		vce_v4_0_set_vce_sw_clock_gating(adev, false);
396 	*/
397 
398 	return 0;
399 }
400 
401 static int vce_v4_0_early_init(void *handle)
402 {
403 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
404 
405 	if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
406 		adev->vce.num_rings = 1;
407 	else
408 		adev->vce.num_rings = 3;
409 
410 	vce_v4_0_set_ring_funcs(adev);
411 	vce_v4_0_set_irq_funcs(adev);
412 
413 	return 0;
414 }
415 
416 static int vce_v4_0_sw_init(void *handle)
417 {
418 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
419 	struct amdgpu_ring *ring;
420 	unsigned size;
421 	int r, i;
422 
423 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
424 	if (r)
425 		return r;
426 
427 	size  = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE;
428 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
429 		size += VCE_V4_0_FW_SIZE;
430 
431 	r = amdgpu_vce_sw_init(adev, size);
432 	if (r)
433 		return r;
434 
435 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
436 		const struct common_firmware_header *hdr;
437 		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
438 
439 		adev->vce.saved_bo = kmalloc(size, GFP_KERNEL);
440 		if (!adev->vce.saved_bo)
441 			return -ENOMEM;
442 
443 		hdr = (const struct common_firmware_header *)adev->vce.fw->data;
444 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
445 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
446 		adev->firmware.fw_size +=
447 			ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
448 		DRM_INFO("PSP loading VCE firmware\n");
449 	} else {
450 		r = amdgpu_vce_resume(adev);
451 		if (r)
452 			return r;
453 	}
454 
455 	for (i = 0; i < adev->vce.num_rings; i++) {
456 		ring = &adev->vce.ring[i];
457 		sprintf(ring->name, "vce%d", i);
458 		if (amdgpu_sriov_vf(adev)) {
459 			/* DOORBELL only works under SRIOV */
460 			ring->use_doorbell = true;
461 
462 			/* currently only use the first encoding ring for sriov,
463 			 * so set unused location for other unused rings.
464 			 */
465 			if (i == 0)
466 				ring->doorbell_index = AMDGPU_DOORBELL64_VCE_RING0_1 * 2;
467 			else
468 				ring->doorbell_index = AMDGPU_DOORBELL64_VCE_RING2_3 * 2 + 1;
469 		}
470 		r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
471 		if (r)
472 			return r;
473 	}
474 
475 	r = amdgpu_virt_alloc_mm_table(adev);
476 	if (r)
477 		return r;
478 
479 	return r;
480 }
481 
482 static int vce_v4_0_sw_fini(void *handle)
483 {
484 	int r;
485 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
486 
487 	/* free MM table */
488 	amdgpu_virt_free_mm_table(adev);
489 
490 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
491 		kfree(adev->vce.saved_bo);
492 		adev->vce.saved_bo = NULL;
493 	}
494 
495 	r = amdgpu_vce_suspend(adev);
496 	if (r)
497 		return r;
498 
499 	return amdgpu_vce_sw_fini(adev);
500 }
501 
502 static int vce_v4_0_hw_init(void *handle)
503 {
504 	int r, i;
505 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
506 
507 	if (amdgpu_sriov_vf(adev))
508 		r = vce_v4_0_sriov_start(adev);
509 	else
510 		r = vce_v4_0_start(adev);
511 	if (r)
512 		return r;
513 
514 	for (i = 0; i < adev->vce.num_rings; i++)
515 		adev->vce.ring[i].ready = false;
516 
517 	for (i = 0; i < adev->vce.num_rings; i++) {
518 		r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
519 		if (r)
520 			return r;
521 		else
522 			adev->vce.ring[i].ready = true;
523 	}
524 
525 	DRM_INFO("VCE initialized successfully.\n");
526 
527 	return 0;
528 }
529 
530 static int vce_v4_0_hw_fini(void *handle)
531 {
532 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
533 	int i;
534 
535 	if (!amdgpu_sriov_vf(adev)) {
536 		/* vce_v4_0_wait_for_idle(handle); */
537 		vce_v4_0_stop(adev);
538 	} else {
539 		/* full access mode, so don't touch any VCE register */
540 		DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
541 	}
542 
543 	for (i = 0; i < adev->vce.num_rings; i++)
544 		adev->vce.ring[i].ready = false;
545 
546 	return 0;
547 }
548 
549 static int vce_v4_0_suspend(void *handle)
550 {
551 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
552 	int r;
553 
554 	if (adev->vce.vcpu_bo == NULL)
555 		return 0;
556 
557 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
558 		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
559 		void *ptr = adev->vce.cpu_addr;
560 
561 		memcpy_fromio(adev->vce.saved_bo, ptr, size);
562 	}
563 
564 	r = vce_v4_0_hw_fini(adev);
565 	if (r)
566 		return r;
567 
568 	return amdgpu_vce_suspend(adev);
569 }
570 
571 static int vce_v4_0_resume(void *handle)
572 {
573 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
574 	int r;
575 
576 	if (adev->vce.vcpu_bo == NULL)
577 		return -EINVAL;
578 
579 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
580 		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
581 		void *ptr = adev->vce.cpu_addr;
582 
583 		memcpy_toio(ptr, adev->vce.saved_bo, size);
584 	} else {
585 		r = amdgpu_vce_resume(adev);
586 		if (r)
587 			return r;
588 	}
589 
590 	return vce_v4_0_hw_init(adev);
591 }
592 
593 static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
594 {
595 	uint32_t offset, size;
596 
597 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
598 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
599 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
600 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
601 
602 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
603 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
604 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
605 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
606 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
607 
608 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
609 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
610 			(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8));
611 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
612 			(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff);
613 	} else {
614 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
615 			(adev->vce.gpu_addr >> 8));
616 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
617 			(adev->vce.gpu_addr >> 40) & 0xff);
618 	}
619 
620 	offset = AMDGPU_VCE_FIRMWARE_OFFSET;
621 	size = VCE_V4_0_FW_SIZE;
622 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
623 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
624 
625 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
626 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
627 	offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
628 	size = VCE_V4_0_STACK_SIZE;
629 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
630 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
631 
632 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
633 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
634 	offset += size;
635 	size = VCE_V4_0_DATA_SIZE;
636 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
637 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
638 
639 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
640 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
641 			VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
642 			~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
643 }
644 
645 static int vce_v4_0_set_clockgating_state(void *handle,
646 					  enum amd_clockgating_state state)
647 {
648 	/* needed for driver unload*/
649 	return 0;
650 }
651 
652 #if 0
653 static bool vce_v4_0_is_idle(void *handle)
654 {
655 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
656 	u32 mask = 0;
657 
658 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
659 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
660 
661 	return !(RREG32(mmSRBM_STATUS2) & mask);
662 }
663 
664 static int vce_v4_0_wait_for_idle(void *handle)
665 {
666 	unsigned i;
667 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
668 
669 	for (i = 0; i < adev->usec_timeout; i++)
670 		if (vce_v4_0_is_idle(handle))
671 			return 0;
672 
673 	return -ETIMEDOUT;
674 }
675 
676 #define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
677 #define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
678 #define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
679 #define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
680 				      VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
681 
682 static bool vce_v4_0_check_soft_reset(void *handle)
683 {
684 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
685 	u32 srbm_soft_reset = 0;
686 
687 	/* According to VCE team , we should use VCE_STATUS instead
688 	 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
689 	 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
690 	 * instance's registers are accessed
691 	 * (0 for 1st instance, 10 for 2nd instance).
692 	 *
693 	 *VCE_STATUS
694 	 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
695 	 *|----+----+-----------+----+----+----+----------+---------+----|
696 	 *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
697 	 *
698 	 * VCE team suggest use bit 3--bit 6 for busy status check
699 	 */
700 	mutex_lock(&adev->grbm_idx_mutex);
701 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
702 	if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
703 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
704 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
705 	}
706 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
707 	if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
708 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
709 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
710 	}
711 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
712 	mutex_unlock(&adev->grbm_idx_mutex);
713 
714 	if (srbm_soft_reset) {
715 		adev->vce.srbm_soft_reset = srbm_soft_reset;
716 		return true;
717 	} else {
718 		adev->vce.srbm_soft_reset = 0;
719 		return false;
720 	}
721 }
722 
723 static int vce_v4_0_soft_reset(void *handle)
724 {
725 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
726 	u32 srbm_soft_reset;
727 
728 	if (!adev->vce.srbm_soft_reset)
729 		return 0;
730 	srbm_soft_reset = adev->vce.srbm_soft_reset;
731 
732 	if (srbm_soft_reset) {
733 		u32 tmp;
734 
735 		tmp = RREG32(mmSRBM_SOFT_RESET);
736 		tmp |= srbm_soft_reset;
737 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
738 		WREG32(mmSRBM_SOFT_RESET, tmp);
739 		tmp = RREG32(mmSRBM_SOFT_RESET);
740 
741 		udelay(50);
742 
743 		tmp &= ~srbm_soft_reset;
744 		WREG32(mmSRBM_SOFT_RESET, tmp);
745 		tmp = RREG32(mmSRBM_SOFT_RESET);
746 
747 		/* Wait a little for things to settle down */
748 		udelay(50);
749 	}
750 
751 	return 0;
752 }
753 
754 static int vce_v4_0_pre_soft_reset(void *handle)
755 {
756 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
757 
758 	if (!adev->vce.srbm_soft_reset)
759 		return 0;
760 
761 	mdelay(5);
762 
763 	return vce_v4_0_suspend(adev);
764 }
765 
766 
767 static int vce_v4_0_post_soft_reset(void *handle)
768 {
769 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
770 
771 	if (!adev->vce.srbm_soft_reset)
772 		return 0;
773 
774 	mdelay(5);
775 
776 	return vce_v4_0_resume(adev);
777 }
778 
779 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
780 {
781 	u32 tmp, data;
782 
783 	tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
784 	if (override)
785 		data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
786 	else
787 		data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
788 
789 	if (tmp != data)
790 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
791 }
792 
793 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
794 					     bool gated)
795 {
796 	u32 data;
797 
798 	/* Set Override to disable Clock Gating */
799 	vce_v4_0_override_vce_clock_gating(adev, true);
800 
801 	/* This function enables MGCG which is controlled by firmware.
802 	   With the clocks in the gated state the core is still
803 	   accessible but the firmware will throttle the clocks on the
804 	   fly as necessary.
805 	*/
806 	if (gated) {
807 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
808 		data |= 0x1ff;
809 		data &= ~0xef0000;
810 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
811 
812 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
813 		data |= 0x3ff000;
814 		data &= ~0xffc00000;
815 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
816 
817 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
818 		data |= 0x2;
819 		data &= ~0x00010000;
820 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
821 
822 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
823 		data |= 0x37f;
824 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
825 
826 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
827 		data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
828 			VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
829 			VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
830 			0x8;
831 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
832 	} else {
833 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
834 		data &= ~0x80010;
835 		data |= 0xe70008;
836 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
837 
838 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
839 		data |= 0xffc00000;
840 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
841 
842 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
843 		data |= 0x10000;
844 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
845 
846 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
847 		data &= ~0xffc00000;
848 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
849 
850 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
851 		data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
852 			  VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
853 			  VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
854 			  0x8);
855 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
856 	}
857 	vce_v4_0_override_vce_clock_gating(adev, false);
858 }
859 
860 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
861 {
862 	u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
863 
864 	if (enable)
865 		tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
866 	else
867 		tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
868 
869 	WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
870 }
871 
872 static int vce_v4_0_set_clockgating_state(void *handle,
873 					  enum amd_clockgating_state state)
874 {
875 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
876 	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
877 	int i;
878 
879 	if ((adev->asic_type == CHIP_POLARIS10) ||
880 		(adev->asic_type == CHIP_TONGA) ||
881 		(adev->asic_type == CHIP_FIJI))
882 		vce_v4_0_set_bypass_mode(adev, enable);
883 
884 	if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
885 		return 0;
886 
887 	mutex_lock(&adev->grbm_idx_mutex);
888 	for (i = 0; i < 2; i++) {
889 		/* Program VCE Instance 0 or 1 if not harvested */
890 		if (adev->vce.harvest_config & (1 << i))
891 			continue;
892 
893 		WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
894 
895 		if (enable) {
896 			/* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
897 			uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
898 			data &= ~(0xf | 0xff0);
899 			data |= ((0x0 << 0) | (0x04 << 4));
900 			WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
901 
902 			/* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
903 			data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
904 			data &= ~(0xf | 0xff0);
905 			data |= ((0x0 << 0) | (0x04 << 4));
906 			WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
907 		}
908 
909 		vce_v4_0_set_vce_sw_clock_gating(adev, enable);
910 	}
911 
912 	WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
913 	mutex_unlock(&adev->grbm_idx_mutex);
914 
915 	return 0;
916 }
917 
918 static int vce_v4_0_set_powergating_state(void *handle,
919 					  enum amd_powergating_state state)
920 {
921 	/* This doesn't actually powergate the VCE block.
922 	 * That's done in the dpm code via the SMC.  This
923 	 * just re-inits the block as necessary.  The actual
924 	 * gating still happens in the dpm code.  We should
925 	 * revisit this when there is a cleaner line between
926 	 * the smc and the hw blocks
927 	 */
928 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
929 
930 	if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE))
931 		return 0;
932 
933 	if (state == AMD_PG_STATE_GATE)
934 		/* XXX do we need a vce_v4_0_stop()? */
935 		return 0;
936 	else
937 		return vce_v4_0_start(adev);
938 }
939 #endif
940 
941 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
942 		struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch)
943 {
944 	amdgpu_ring_write(ring, VCE_CMD_IB_VM);
945 	amdgpu_ring_write(ring, vmid);
946 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
947 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
948 	amdgpu_ring_write(ring, ib->length_dw);
949 }
950 
951 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
952 			u64 seq, unsigned flags)
953 {
954 	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
955 
956 	amdgpu_ring_write(ring, VCE_CMD_FENCE);
957 	amdgpu_ring_write(ring, addr);
958 	amdgpu_ring_write(ring, upper_32_bits(addr));
959 	amdgpu_ring_write(ring, seq);
960 	amdgpu_ring_write(ring, VCE_CMD_TRAP);
961 }
962 
963 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
964 {
965 	amdgpu_ring_write(ring, VCE_CMD_END);
966 }
967 
968 static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
969 				   uint32_t val, uint32_t mask)
970 {
971 	amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
972 	amdgpu_ring_write(ring,	reg << 2);
973 	amdgpu_ring_write(ring, mask);
974 	amdgpu_ring_write(ring, val);
975 }
976 
977 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
978 				   unsigned int vmid, uint64_t pd_addr)
979 {
980 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
981 
982 	pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
983 
984 	/* wait for reg writes */
985 	vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2,
986 			       lower_32_bits(pd_addr), 0xffffffff);
987 }
988 
989 static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring,
990 			       uint32_t reg, uint32_t val)
991 {
992 	amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
993 	amdgpu_ring_write(ring,	reg << 2);
994 	amdgpu_ring_write(ring, val);
995 }
996 
997 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
998 					struct amdgpu_irq_src *source,
999 					unsigned type,
1000 					enum amdgpu_interrupt_state state)
1001 {
1002 	uint32_t val = 0;
1003 
1004 	if (!amdgpu_sriov_vf(adev)) {
1005 		if (state == AMDGPU_IRQ_STATE_ENABLE)
1006 			val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
1007 
1008 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
1009 				~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
1010 	}
1011 	return 0;
1012 }
1013 
1014 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1015 				      struct amdgpu_irq_src *source,
1016 				      struct amdgpu_iv_entry *entry)
1017 {
1018 	DRM_DEBUG("IH: VCE\n");
1019 
1020 	switch (entry->src_data[0]) {
1021 	case 0:
1022 	case 1:
1023 	case 2:
1024 		amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
1025 		break;
1026 	default:
1027 		DRM_ERROR("Unhandled interrupt: %d %d\n",
1028 			  entry->src_id, entry->src_data[0]);
1029 		break;
1030 	}
1031 
1032 	return 0;
1033 }
1034 
1035 const struct amd_ip_funcs vce_v4_0_ip_funcs = {
1036 	.name = "vce_v4_0",
1037 	.early_init = vce_v4_0_early_init,
1038 	.late_init = NULL,
1039 	.sw_init = vce_v4_0_sw_init,
1040 	.sw_fini = vce_v4_0_sw_fini,
1041 	.hw_init = vce_v4_0_hw_init,
1042 	.hw_fini = vce_v4_0_hw_fini,
1043 	.suspend = vce_v4_0_suspend,
1044 	.resume = vce_v4_0_resume,
1045 	.is_idle = NULL /* vce_v4_0_is_idle */,
1046 	.wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1047 	.check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1048 	.pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1049 	.soft_reset = NULL /* vce_v4_0_soft_reset */,
1050 	.post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1051 	.set_clockgating_state = vce_v4_0_set_clockgating_state,
1052 	.set_powergating_state = NULL /* vce_v4_0_set_powergating_state */,
1053 };
1054 
1055 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1056 	.type = AMDGPU_RING_TYPE_VCE,
1057 	.align_mask = 0x3f,
1058 	.nop = VCE_CMD_NO_OP,
1059 	.support_64bit_ptrs = false,
1060 	.vmhub = AMDGPU_MMHUB,
1061 	.get_rptr = vce_v4_0_ring_get_rptr,
1062 	.get_wptr = vce_v4_0_ring_get_wptr,
1063 	.set_wptr = vce_v4_0_ring_set_wptr,
1064 	.parse_cs = amdgpu_vce_ring_parse_cs_vm,
1065 	.emit_frame_size =
1066 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1067 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1068 		4 + /* vce_v4_0_emit_vm_flush */
1069 		5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1070 		1, /* vce_v4_0_ring_insert_end */
1071 	.emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1072 	.emit_ib = vce_v4_0_ring_emit_ib,
1073 	.emit_vm_flush = vce_v4_0_emit_vm_flush,
1074 	.emit_fence = vce_v4_0_ring_emit_fence,
1075 	.test_ring = amdgpu_vce_ring_test_ring,
1076 	.test_ib = amdgpu_vce_ring_test_ib,
1077 	.insert_nop = amdgpu_ring_insert_nop,
1078 	.insert_end = vce_v4_0_ring_insert_end,
1079 	.pad_ib = amdgpu_ring_generic_pad_ib,
1080 	.begin_use = amdgpu_vce_ring_begin_use,
1081 	.end_use = amdgpu_vce_ring_end_use,
1082 	.emit_wreg = vce_v4_0_emit_wreg,
1083 	.emit_reg_wait = vce_v4_0_emit_reg_wait,
1084 	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1085 };
1086 
1087 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1088 {
1089 	int i;
1090 
1091 	for (i = 0; i < adev->vce.num_rings; i++) {
1092 		adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1093 		adev->vce.ring[i].me = i;
1094 	}
1095 	DRM_INFO("VCE enabled in VM mode\n");
1096 }
1097 
1098 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1099 	.set = vce_v4_0_set_interrupt_state,
1100 	.process = vce_v4_0_process_interrupt,
1101 };
1102 
1103 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1104 {
1105 	adev->vce.irq.num_types = 1;
1106 	adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1107 };
1108 
1109 const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1110 {
1111 	.type = AMD_IP_BLOCK_TYPE_VCE,
1112 	.major = 4,
1113 	.minor = 0,
1114 	.rev = 0,
1115 	.funcs = &vce_v4_0_ip_funcs,
1116 };
1117