xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c (revision 4a075bd4)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  */
26 
27 #include <linux/firmware.h>
28 #include <drm/drmP.h>
29 #include "amdgpu.h"
30 #include "amdgpu_vce.h"
31 #include "soc15.h"
32 #include "soc15d.h"
33 #include "soc15_common.h"
34 #include "mmsch_v1_0.h"
35 
36 #include "vce/vce_4_0_offset.h"
37 #include "vce/vce_4_0_default.h"
38 #include "vce/vce_4_0_sh_mask.h"
39 #include "mmhub/mmhub_1_0_offset.h"
40 #include "mmhub/mmhub_1_0_sh_mask.h"
41 
42 #include "ivsrcid/vce/irqsrcs_vce_4_0.h"
43 
44 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
45 
46 #define VCE_V4_0_FW_SIZE	(384 * 1024)
47 #define VCE_V4_0_STACK_SIZE	(64 * 1024)
48 #define VCE_V4_0_DATA_SIZE	((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
49 
50 static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
51 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
52 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
53 
54 /**
55  * vce_v4_0_ring_get_rptr - get read pointer
56  *
57  * @ring: amdgpu_ring pointer
58  *
59  * Returns the current hardware read pointer
60  */
61 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
62 {
63 	struct amdgpu_device *adev = ring->adev;
64 
65 	if (ring->me == 0)
66 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
67 	else if (ring->me == 1)
68 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
69 	else
70 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
71 }
72 
73 /**
74  * vce_v4_0_ring_get_wptr - get write pointer
75  *
76  * @ring: amdgpu_ring pointer
77  *
78  * Returns the current hardware write pointer
79  */
80 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
81 {
82 	struct amdgpu_device *adev = ring->adev;
83 
84 	if (ring->use_doorbell)
85 		return adev->wb.wb[ring->wptr_offs];
86 
87 	if (ring->me == 0)
88 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
89 	else if (ring->me == 1)
90 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
91 	else
92 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
93 }
94 
95 /**
96  * vce_v4_0_ring_set_wptr - set write pointer
97  *
98  * @ring: amdgpu_ring pointer
99  *
100  * Commits the write pointer to the hardware
101  */
102 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
103 {
104 	struct amdgpu_device *adev = ring->adev;
105 
106 	if (ring->use_doorbell) {
107 		/* XXX check if swapping is necessary on BE */
108 		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
109 		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
110 		return;
111 	}
112 
113 	if (ring->me == 0)
114 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
115 			lower_32_bits(ring->wptr));
116 	else if (ring->me == 1)
117 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
118 			lower_32_bits(ring->wptr));
119 	else
120 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
121 			lower_32_bits(ring->wptr));
122 }
123 
124 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
125 {
126 	int i, j;
127 
128 	for (i = 0; i < 10; ++i) {
129 		for (j = 0; j < 100; ++j) {
130 			uint32_t status =
131 				RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
132 
133 			if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
134 				return 0;
135 			mdelay(10);
136 		}
137 
138 		DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
139 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
140 				VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
141 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
142 		mdelay(10);
143 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
144 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
145 		mdelay(10);
146 
147 	}
148 
149 	return -ETIMEDOUT;
150 }
151 
152 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
153 				struct amdgpu_mm_table *table)
154 {
155 	uint32_t data = 0, loop;
156 	uint64_t addr = table->gpu_addr;
157 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
158 	uint32_t size;
159 
160 	size = header->header_size + header->vce_table_size + header->uvd_table_size;
161 
162 	/* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
163 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
164 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
165 
166 	/* 2, update vmid of descriptor */
167 	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
168 	data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
169 	data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
170 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
171 
172 	/* 3, notify mmsch about the size of this descriptor */
173 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
174 
175 	/* 4, set resp to zero */
176 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
177 
178 	WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
179 	adev->wb.wb[adev->vce.ring[0].wptr_offs] = 0;
180 	adev->vce.ring[0].wptr = 0;
181 	adev->vce.ring[0].wptr_old = 0;
182 
183 	/* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
184 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
185 
186 	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
187 	loop = 1000;
188 	while ((data & 0x10000002) != 0x10000002) {
189 		udelay(10);
190 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
191 		loop--;
192 		if (!loop)
193 			break;
194 	}
195 
196 	if (!loop) {
197 		dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
198 		return -EBUSY;
199 	}
200 
201 	return 0;
202 }
203 
204 static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
205 {
206 	struct amdgpu_ring *ring;
207 	uint32_t offset, size;
208 	uint32_t table_size = 0;
209 	struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
210 	struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
211 	struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
212 	struct mmsch_v1_0_cmd_end end = { { 0 } };
213 	uint32_t *init_table = adev->virt.mm_table.cpu_addr;
214 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
215 
216 	direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
217 	direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
218 	direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
219 	end.cmd_header.command_type = MMSCH_COMMAND__END;
220 
221 	if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
222 		header->version = MMSCH_VERSION;
223 		header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
224 
225 		if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
226 			header->vce_table_offset = header->header_size;
227 		else
228 			header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
229 
230 		init_table += header->vce_table_offset;
231 
232 		ring = &adev->vce.ring[0];
233 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
234 					    lower_32_bits(ring->gpu_addr));
235 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
236 					    upper_32_bits(ring->gpu_addr));
237 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
238 					    ring->ring_size / 4);
239 
240 		/* BEGING OF MC_RESUME */
241 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
242 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
243 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
244 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
245 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
246 
247 		offset = AMDGPU_VCE_FIRMWARE_OFFSET;
248 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
249 			uint32_t low = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
250 			uint32_t hi = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi;
251 			uint64_t tmr_mc_addr = (uint64_t)(hi) << 32 | low;
252 
253 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
254 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), tmr_mc_addr >> 8);
255 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
256 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
257 						(tmr_mc_addr >> 40) & 0xff);
258 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
259 		} else {
260 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
261 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
262 						adev->vce.gpu_addr >> 8);
263 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
264 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
265 						(adev->vce.gpu_addr >> 40) & 0xff);
266 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
267 						offset & ~0x0f000000);
268 
269 		}
270 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
271 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
272 						adev->vce.gpu_addr >> 8);
273 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
274 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR1),
275 						(adev->vce.gpu_addr >> 40) & 0xff);
276 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
277 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
278 						adev->vce.gpu_addr >> 8);
279 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
280 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR2),
281 						(adev->vce.gpu_addr >> 40) & 0xff);
282 
283 		size = VCE_V4_0_FW_SIZE;
284 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
285 
286 		offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
287 		size = VCE_V4_0_STACK_SIZE;
288 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
289 					(offset & ~0x0f000000) | (1 << 24));
290 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
291 
292 		offset += size;
293 		size = VCE_V4_0_DATA_SIZE;
294 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
295 					(offset & ~0x0f000000) | (2 << 24));
296 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
297 
298 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
299 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
300 						   VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
301 						   VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
302 
303 		/* end of MC_RESUME */
304 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
305 						   VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
306 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
307 						   ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
308 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
309 						   ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
310 
311 		MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
312 					      VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
313 					      VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
314 
315 		/* clear BUSY flag */
316 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
317 						   ~VCE_STATUS__JOB_BUSY_MASK, 0);
318 
319 		/* add end packet */
320 		memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
321 		table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
322 		header->vce_table_size = table_size;
323 	}
324 
325 	return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
326 }
327 
328 /**
329  * vce_v4_0_start - start VCE block
330  *
331  * @adev: amdgpu_device pointer
332  *
333  * Setup and start the VCE block
334  */
335 static int vce_v4_0_start(struct amdgpu_device *adev)
336 {
337 	struct amdgpu_ring *ring;
338 	int r;
339 
340 	ring = &adev->vce.ring[0];
341 
342 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
343 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
344 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
345 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
346 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
347 
348 	ring = &adev->vce.ring[1];
349 
350 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
351 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
352 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
353 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
354 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
355 
356 	ring = &adev->vce.ring[2];
357 
358 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
359 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
360 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
361 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
362 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
363 
364 	vce_v4_0_mc_resume(adev);
365 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
366 			~VCE_STATUS__JOB_BUSY_MASK);
367 
368 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
369 
370 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
371 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
372 	mdelay(100);
373 
374 	r = vce_v4_0_firmware_loaded(adev);
375 
376 	/* clear BUSY flag */
377 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
378 
379 	if (r) {
380 		DRM_ERROR("VCE not responding, giving up!!!\n");
381 		return r;
382 	}
383 
384 	return 0;
385 }
386 
387 static int vce_v4_0_stop(struct amdgpu_device *adev)
388 {
389 
390 	/* Disable VCPU */
391 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
392 
393 	/* hold on ECPU */
394 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
395 			VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
396 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
397 
398 	/* clear VCE_STATUS */
399 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0);
400 
401 	/* Set Clock-Gating off */
402 	/* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
403 		vce_v4_0_set_vce_sw_clock_gating(adev, false);
404 	*/
405 
406 	return 0;
407 }
408 
409 static int vce_v4_0_early_init(void *handle)
410 {
411 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
412 
413 	if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
414 		adev->vce.num_rings = 1;
415 	else
416 		adev->vce.num_rings = 3;
417 
418 	vce_v4_0_set_ring_funcs(adev);
419 	vce_v4_0_set_irq_funcs(adev);
420 
421 	return 0;
422 }
423 
424 static int vce_v4_0_sw_init(void *handle)
425 {
426 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
427 	struct amdgpu_ring *ring;
428 
429 	unsigned size;
430 	int r, i;
431 
432 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
433 	if (r)
434 		return r;
435 
436 	size  = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE;
437 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
438 		size += VCE_V4_0_FW_SIZE;
439 
440 	r = amdgpu_vce_sw_init(adev, size);
441 	if (r)
442 		return r;
443 
444 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
445 		const struct common_firmware_header *hdr;
446 		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
447 
448 		adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL);
449 		if (!adev->vce.saved_bo)
450 			return -ENOMEM;
451 
452 		hdr = (const struct common_firmware_header *)adev->vce.fw->data;
453 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
454 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
455 		adev->firmware.fw_size +=
456 			ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
457 		DRM_INFO("PSP loading VCE firmware\n");
458 	} else {
459 		r = amdgpu_vce_resume(adev);
460 		if (r)
461 			return r;
462 	}
463 
464 	for (i = 0; i < adev->vce.num_rings; i++) {
465 		ring = &adev->vce.ring[i];
466 		sprintf(ring->name, "vce%d", i);
467 		if (amdgpu_sriov_vf(adev)) {
468 			/* DOORBELL only works under SRIOV */
469 			ring->use_doorbell = true;
470 
471 			/* currently only use the first encoding ring for sriov,
472 			 * so set unused location for other unused rings.
473 			 */
474 			if (i == 0)
475 				ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring0_1 * 2;
476 			else
477 				ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring2_3 * 2 + 1;
478 		}
479 		r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
480 		if (r)
481 			return r;
482 	}
483 
484 
485 	r = amdgpu_vce_entity_init(adev);
486 	if (r)
487 		return r;
488 
489 	r = amdgpu_virt_alloc_mm_table(adev);
490 	if (r)
491 		return r;
492 
493 	return r;
494 }
495 
496 static int vce_v4_0_sw_fini(void *handle)
497 {
498 	int r;
499 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
500 
501 	/* free MM table */
502 	amdgpu_virt_free_mm_table(adev);
503 
504 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
505 		kvfree(adev->vce.saved_bo);
506 		adev->vce.saved_bo = NULL;
507 	}
508 
509 	r = amdgpu_vce_suspend(adev);
510 	if (r)
511 		return r;
512 
513 	return amdgpu_vce_sw_fini(adev);
514 }
515 
516 static int vce_v4_0_hw_init(void *handle)
517 {
518 	int r, i;
519 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
520 
521 	if (amdgpu_sriov_vf(adev))
522 		r = vce_v4_0_sriov_start(adev);
523 	else
524 		r = vce_v4_0_start(adev);
525 	if (r)
526 		return r;
527 
528 	for (i = 0; i < adev->vce.num_rings; i++) {
529 		r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
530 		if (r)
531 			return r;
532 	}
533 
534 	DRM_INFO("VCE initialized successfully.\n");
535 
536 	return 0;
537 }
538 
539 static int vce_v4_0_hw_fini(void *handle)
540 {
541 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
542 	int i;
543 
544 	if (!amdgpu_sriov_vf(adev)) {
545 		/* vce_v4_0_wait_for_idle(handle); */
546 		vce_v4_0_stop(adev);
547 	} else {
548 		/* full access mode, so don't touch any VCE register */
549 		DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
550 	}
551 
552 	for (i = 0; i < adev->vce.num_rings; i++)
553 		adev->vce.ring[i].sched.ready = false;
554 
555 	return 0;
556 }
557 
558 static int vce_v4_0_suspend(void *handle)
559 {
560 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
561 	int r;
562 
563 	if (adev->vce.vcpu_bo == NULL)
564 		return 0;
565 
566 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
567 		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
568 		void *ptr = adev->vce.cpu_addr;
569 
570 		memcpy_fromio(adev->vce.saved_bo, ptr, size);
571 	}
572 
573 	r = vce_v4_0_hw_fini(adev);
574 	if (r)
575 		return r;
576 
577 	return amdgpu_vce_suspend(adev);
578 }
579 
580 static int vce_v4_0_resume(void *handle)
581 {
582 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
583 	int r;
584 
585 	if (adev->vce.vcpu_bo == NULL)
586 		return -EINVAL;
587 
588 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
589 		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
590 		void *ptr = adev->vce.cpu_addr;
591 
592 		memcpy_toio(ptr, adev->vce.saved_bo, size);
593 	} else {
594 		r = amdgpu_vce_resume(adev);
595 		if (r)
596 			return r;
597 	}
598 
599 	return vce_v4_0_hw_init(adev);
600 }
601 
602 static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
603 {
604 	uint32_t offset, size;
605 	uint64_t tmr_mc_addr;
606 
607 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
608 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
609 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
610 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
611 
612 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
613 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
614 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
615 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
616 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
617 
618 	offset = AMDGPU_VCE_FIRMWARE_OFFSET;
619 
620 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
621 		tmr_mc_addr = (uint64_t)(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi) << 32 |
622 										adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
623 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
624 			(tmr_mc_addr >> 8));
625 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
626 			(tmr_mc_addr >> 40) & 0xff);
627 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
628 	} else {
629 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
630 			(adev->vce.gpu_addr >> 8));
631 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
632 			(adev->vce.gpu_addr >> 40) & 0xff);
633 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
634 	}
635 
636 	size = VCE_V4_0_FW_SIZE;
637 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
638 
639 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
640 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
641 	offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
642 	size = VCE_V4_0_STACK_SIZE;
643 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
644 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
645 
646 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
647 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
648 	offset += size;
649 	size = VCE_V4_0_DATA_SIZE;
650 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
651 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
652 
653 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
654 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
655 			VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
656 			~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
657 }
658 
659 static int vce_v4_0_set_clockgating_state(void *handle,
660 					  enum amd_clockgating_state state)
661 {
662 	/* needed for driver unload*/
663 	return 0;
664 }
665 
666 #if 0
667 static bool vce_v4_0_is_idle(void *handle)
668 {
669 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
670 	u32 mask = 0;
671 
672 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
673 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
674 
675 	return !(RREG32(mmSRBM_STATUS2) & mask);
676 }
677 
678 static int vce_v4_0_wait_for_idle(void *handle)
679 {
680 	unsigned i;
681 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
682 
683 	for (i = 0; i < adev->usec_timeout; i++)
684 		if (vce_v4_0_is_idle(handle))
685 			return 0;
686 
687 	return -ETIMEDOUT;
688 }
689 
690 #define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
691 #define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
692 #define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
693 #define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
694 				      VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
695 
696 static bool vce_v4_0_check_soft_reset(void *handle)
697 {
698 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
699 	u32 srbm_soft_reset = 0;
700 
701 	/* According to VCE team , we should use VCE_STATUS instead
702 	 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
703 	 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
704 	 * instance's registers are accessed
705 	 * (0 for 1st instance, 10 for 2nd instance).
706 	 *
707 	 *VCE_STATUS
708 	 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
709 	 *|----+----+-----------+----+----+----+----------+---------+----|
710 	 *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
711 	 *
712 	 * VCE team suggest use bit 3--bit 6 for busy status check
713 	 */
714 	mutex_lock(&adev->grbm_idx_mutex);
715 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
716 	if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
717 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
718 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
719 	}
720 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
721 	if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
722 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
723 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
724 	}
725 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
726 	mutex_unlock(&adev->grbm_idx_mutex);
727 
728 	if (srbm_soft_reset) {
729 		adev->vce.srbm_soft_reset = srbm_soft_reset;
730 		return true;
731 	} else {
732 		adev->vce.srbm_soft_reset = 0;
733 		return false;
734 	}
735 }
736 
737 static int vce_v4_0_soft_reset(void *handle)
738 {
739 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
740 	u32 srbm_soft_reset;
741 
742 	if (!adev->vce.srbm_soft_reset)
743 		return 0;
744 	srbm_soft_reset = adev->vce.srbm_soft_reset;
745 
746 	if (srbm_soft_reset) {
747 		u32 tmp;
748 
749 		tmp = RREG32(mmSRBM_SOFT_RESET);
750 		tmp |= srbm_soft_reset;
751 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
752 		WREG32(mmSRBM_SOFT_RESET, tmp);
753 		tmp = RREG32(mmSRBM_SOFT_RESET);
754 
755 		udelay(50);
756 
757 		tmp &= ~srbm_soft_reset;
758 		WREG32(mmSRBM_SOFT_RESET, tmp);
759 		tmp = RREG32(mmSRBM_SOFT_RESET);
760 
761 		/* Wait a little for things to settle down */
762 		udelay(50);
763 	}
764 
765 	return 0;
766 }
767 
768 static int vce_v4_0_pre_soft_reset(void *handle)
769 {
770 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
771 
772 	if (!adev->vce.srbm_soft_reset)
773 		return 0;
774 
775 	mdelay(5);
776 
777 	return vce_v4_0_suspend(adev);
778 }
779 
780 
781 static int vce_v4_0_post_soft_reset(void *handle)
782 {
783 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
784 
785 	if (!adev->vce.srbm_soft_reset)
786 		return 0;
787 
788 	mdelay(5);
789 
790 	return vce_v4_0_resume(adev);
791 }
792 
793 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
794 {
795 	u32 tmp, data;
796 
797 	tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
798 	if (override)
799 		data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
800 	else
801 		data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
802 
803 	if (tmp != data)
804 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
805 }
806 
807 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
808 					     bool gated)
809 {
810 	u32 data;
811 
812 	/* Set Override to disable Clock Gating */
813 	vce_v4_0_override_vce_clock_gating(adev, true);
814 
815 	/* This function enables MGCG which is controlled by firmware.
816 	   With the clocks in the gated state the core is still
817 	   accessible but the firmware will throttle the clocks on the
818 	   fly as necessary.
819 	*/
820 	if (gated) {
821 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
822 		data |= 0x1ff;
823 		data &= ~0xef0000;
824 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
825 
826 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
827 		data |= 0x3ff000;
828 		data &= ~0xffc00000;
829 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
830 
831 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
832 		data |= 0x2;
833 		data &= ~0x00010000;
834 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
835 
836 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
837 		data |= 0x37f;
838 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
839 
840 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
841 		data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
842 			VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
843 			VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
844 			0x8;
845 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
846 	} else {
847 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
848 		data &= ~0x80010;
849 		data |= 0xe70008;
850 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
851 
852 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
853 		data |= 0xffc00000;
854 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
855 
856 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
857 		data |= 0x10000;
858 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
859 
860 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
861 		data &= ~0xffc00000;
862 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
863 
864 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
865 		data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
866 			  VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
867 			  VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
868 			  0x8);
869 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
870 	}
871 	vce_v4_0_override_vce_clock_gating(adev, false);
872 }
873 
874 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
875 {
876 	u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
877 
878 	if (enable)
879 		tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
880 	else
881 		tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
882 
883 	WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
884 }
885 
886 static int vce_v4_0_set_clockgating_state(void *handle,
887 					  enum amd_clockgating_state state)
888 {
889 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
890 	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
891 	int i;
892 
893 	if ((adev->asic_type == CHIP_POLARIS10) ||
894 		(adev->asic_type == CHIP_TONGA) ||
895 		(adev->asic_type == CHIP_FIJI))
896 		vce_v4_0_set_bypass_mode(adev, enable);
897 
898 	if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
899 		return 0;
900 
901 	mutex_lock(&adev->grbm_idx_mutex);
902 	for (i = 0; i < 2; i++) {
903 		/* Program VCE Instance 0 or 1 if not harvested */
904 		if (adev->vce.harvest_config & (1 << i))
905 			continue;
906 
907 		WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
908 
909 		if (enable) {
910 			/* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
911 			uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
912 			data &= ~(0xf | 0xff0);
913 			data |= ((0x0 << 0) | (0x04 << 4));
914 			WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
915 
916 			/* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
917 			data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
918 			data &= ~(0xf | 0xff0);
919 			data |= ((0x0 << 0) | (0x04 << 4));
920 			WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
921 		}
922 
923 		vce_v4_0_set_vce_sw_clock_gating(adev, enable);
924 	}
925 
926 	WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
927 	mutex_unlock(&adev->grbm_idx_mutex);
928 
929 	return 0;
930 }
931 #endif
932 
933 static int vce_v4_0_set_powergating_state(void *handle,
934 					  enum amd_powergating_state state)
935 {
936 	/* This doesn't actually powergate the VCE block.
937 	 * That's done in the dpm code via the SMC.  This
938 	 * just re-inits the block as necessary.  The actual
939 	 * gating still happens in the dpm code.  We should
940 	 * revisit this when there is a cleaner line between
941 	 * the smc and the hw blocks
942 	 */
943 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
944 
945 	if (state == AMD_PG_STATE_GATE)
946 		return vce_v4_0_stop(adev);
947 	else
948 		return vce_v4_0_start(adev);
949 }
950 
951 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
952 					struct amdgpu_ib *ib, uint32_t flags)
953 {
954 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
955 
956 	amdgpu_ring_write(ring, VCE_CMD_IB_VM);
957 	amdgpu_ring_write(ring, vmid);
958 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
959 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
960 	amdgpu_ring_write(ring, ib->length_dw);
961 }
962 
963 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
964 			u64 seq, unsigned flags)
965 {
966 	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
967 
968 	amdgpu_ring_write(ring, VCE_CMD_FENCE);
969 	amdgpu_ring_write(ring, addr);
970 	amdgpu_ring_write(ring, upper_32_bits(addr));
971 	amdgpu_ring_write(ring, seq);
972 	amdgpu_ring_write(ring, VCE_CMD_TRAP);
973 }
974 
975 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
976 {
977 	amdgpu_ring_write(ring, VCE_CMD_END);
978 }
979 
980 static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
981 				   uint32_t val, uint32_t mask)
982 {
983 	amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
984 	amdgpu_ring_write(ring,	reg << 2);
985 	amdgpu_ring_write(ring, mask);
986 	amdgpu_ring_write(ring, val);
987 }
988 
989 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
990 				   unsigned int vmid, uint64_t pd_addr)
991 {
992 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
993 
994 	pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
995 
996 	/* wait for reg writes */
997 	vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2,
998 			       lower_32_bits(pd_addr), 0xffffffff);
999 }
1000 
1001 static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring,
1002 			       uint32_t reg, uint32_t val)
1003 {
1004 	amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
1005 	amdgpu_ring_write(ring,	reg << 2);
1006 	amdgpu_ring_write(ring, val);
1007 }
1008 
1009 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
1010 					struct amdgpu_irq_src *source,
1011 					unsigned type,
1012 					enum amdgpu_interrupt_state state)
1013 {
1014 	uint32_t val = 0;
1015 
1016 	if (!amdgpu_sriov_vf(adev)) {
1017 		if (state == AMDGPU_IRQ_STATE_ENABLE)
1018 			val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
1019 
1020 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
1021 				~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
1022 	}
1023 	return 0;
1024 }
1025 
1026 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1027 				      struct amdgpu_irq_src *source,
1028 				      struct amdgpu_iv_entry *entry)
1029 {
1030 	DRM_DEBUG("IH: VCE\n");
1031 
1032 	switch (entry->src_data[0]) {
1033 	case 0:
1034 	case 1:
1035 	case 2:
1036 		amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
1037 		break;
1038 	default:
1039 		DRM_ERROR("Unhandled interrupt: %d %d\n",
1040 			  entry->src_id, entry->src_data[0]);
1041 		break;
1042 	}
1043 
1044 	return 0;
1045 }
1046 
1047 const struct amd_ip_funcs vce_v4_0_ip_funcs = {
1048 	.name = "vce_v4_0",
1049 	.early_init = vce_v4_0_early_init,
1050 	.late_init = NULL,
1051 	.sw_init = vce_v4_0_sw_init,
1052 	.sw_fini = vce_v4_0_sw_fini,
1053 	.hw_init = vce_v4_0_hw_init,
1054 	.hw_fini = vce_v4_0_hw_fini,
1055 	.suspend = vce_v4_0_suspend,
1056 	.resume = vce_v4_0_resume,
1057 	.is_idle = NULL /* vce_v4_0_is_idle */,
1058 	.wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1059 	.check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1060 	.pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1061 	.soft_reset = NULL /* vce_v4_0_soft_reset */,
1062 	.post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1063 	.set_clockgating_state = vce_v4_0_set_clockgating_state,
1064 	.set_powergating_state = vce_v4_0_set_powergating_state,
1065 };
1066 
1067 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1068 	.type = AMDGPU_RING_TYPE_VCE,
1069 	.align_mask = 0x3f,
1070 	.nop = VCE_CMD_NO_OP,
1071 	.support_64bit_ptrs = false,
1072 	.vmhub = AMDGPU_MMHUB,
1073 	.get_rptr = vce_v4_0_ring_get_rptr,
1074 	.get_wptr = vce_v4_0_ring_get_wptr,
1075 	.set_wptr = vce_v4_0_ring_set_wptr,
1076 	.parse_cs = amdgpu_vce_ring_parse_cs_vm,
1077 	.emit_frame_size =
1078 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1079 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1080 		4 + /* vce_v4_0_emit_vm_flush */
1081 		5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1082 		1, /* vce_v4_0_ring_insert_end */
1083 	.emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1084 	.emit_ib = vce_v4_0_ring_emit_ib,
1085 	.emit_vm_flush = vce_v4_0_emit_vm_flush,
1086 	.emit_fence = vce_v4_0_ring_emit_fence,
1087 	.test_ring = amdgpu_vce_ring_test_ring,
1088 	.test_ib = amdgpu_vce_ring_test_ib,
1089 	.insert_nop = amdgpu_ring_insert_nop,
1090 	.insert_end = vce_v4_0_ring_insert_end,
1091 	.pad_ib = amdgpu_ring_generic_pad_ib,
1092 	.begin_use = amdgpu_vce_ring_begin_use,
1093 	.end_use = amdgpu_vce_ring_end_use,
1094 	.emit_wreg = vce_v4_0_emit_wreg,
1095 	.emit_reg_wait = vce_v4_0_emit_reg_wait,
1096 	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1097 };
1098 
1099 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1100 {
1101 	int i;
1102 
1103 	for (i = 0; i < adev->vce.num_rings; i++) {
1104 		adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1105 		adev->vce.ring[i].me = i;
1106 	}
1107 	DRM_INFO("VCE enabled in VM mode\n");
1108 }
1109 
1110 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1111 	.set = vce_v4_0_set_interrupt_state,
1112 	.process = vce_v4_0_process_interrupt,
1113 };
1114 
1115 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1116 {
1117 	adev->vce.irq.num_types = 1;
1118 	adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1119 };
1120 
1121 const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1122 {
1123 	.type = AMD_IP_BLOCK_TYPE_VCE,
1124 	.major = 4,
1125 	.minor = 0,
1126 	.rev = 0,
1127 	.funcs = &vce_v4_0_ip_funcs,
1128 };
1129