xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c (revision 4da722ca)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  */
26 
27 #include <linux/firmware.h>
28 #include <drm/drmP.h>
29 #include "amdgpu.h"
30 #include "amdgpu_vce.h"
31 #include "soc15d.h"
32 #include "soc15_common.h"
33 #include "mmsch_v1_0.h"
34 
35 #include "vega10/soc15ip.h"
36 #include "vega10/VCE/vce_4_0_offset.h"
37 #include "vega10/VCE/vce_4_0_default.h"
38 #include "vega10/VCE/vce_4_0_sh_mask.h"
39 #include "vega10/MMHUB/mmhub_1_0_offset.h"
40 #include "vega10/MMHUB/mmhub_1_0_sh_mask.h"
41 
42 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
43 
44 #define VCE_V4_0_FW_SIZE	(384 * 1024)
45 #define VCE_V4_0_STACK_SIZE	(64 * 1024)
46 #define VCE_V4_0_DATA_SIZE	((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
47 
48 static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
49 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
50 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
51 
52 /**
53  * vce_v4_0_ring_get_rptr - get read pointer
54  *
55  * @ring: amdgpu_ring pointer
56  *
57  * Returns the current hardware read pointer
58  */
59 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
60 {
61 	struct amdgpu_device *adev = ring->adev;
62 
63 	if (ring == &adev->vce.ring[0])
64 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
65 	else if (ring == &adev->vce.ring[1])
66 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
67 	else
68 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
69 }
70 
71 /**
72  * vce_v4_0_ring_get_wptr - get write pointer
73  *
74  * @ring: amdgpu_ring pointer
75  *
76  * Returns the current hardware write pointer
77  */
78 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
79 {
80 	struct amdgpu_device *adev = ring->adev;
81 
82 	if (ring->use_doorbell)
83 		return adev->wb.wb[ring->wptr_offs];
84 
85 	if (ring == &adev->vce.ring[0])
86 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
87 	else if (ring == &adev->vce.ring[1])
88 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
89 	else
90 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
91 }
92 
93 /**
94  * vce_v4_0_ring_set_wptr - set write pointer
95  *
96  * @ring: amdgpu_ring pointer
97  *
98  * Commits the write pointer to the hardware
99  */
100 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
101 {
102 	struct amdgpu_device *adev = ring->adev;
103 
104 	if (ring->use_doorbell) {
105 		/* XXX check if swapping is necessary on BE */
106 		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
107 		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
108 		return;
109 	}
110 
111 	if (ring == &adev->vce.ring[0])
112 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
113 			lower_32_bits(ring->wptr));
114 	else if (ring == &adev->vce.ring[1])
115 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
116 			lower_32_bits(ring->wptr));
117 	else
118 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
119 			lower_32_bits(ring->wptr));
120 }
121 
122 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
123 {
124 	int i, j;
125 
126 	for (i = 0; i < 10; ++i) {
127 		for (j = 0; j < 100; ++j) {
128 			uint32_t status =
129 				RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
130 
131 			if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
132 				return 0;
133 			mdelay(10);
134 		}
135 
136 		DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
137 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
138 				VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
139 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
140 		mdelay(10);
141 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
142 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
143 		mdelay(10);
144 
145 	}
146 
147 	return -ETIMEDOUT;
148 }
149 
150 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
151 				struct amdgpu_mm_table *table)
152 {
153 	uint32_t data = 0, loop;
154 	uint64_t addr = table->gpu_addr;
155 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
156 	uint32_t size;
157 
158 	size = header->header_size + header->vce_table_size + header->uvd_table_size;
159 
160 	/* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
161 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
162 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
163 
164 	/* 2, update vmid of descriptor */
165 	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
166 	data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
167 	data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
168 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
169 
170 	/* 3, notify mmsch about the size of this descriptor */
171 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
172 
173 	/* 4, set resp to zero */
174 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
175 
176 	/* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
177 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
178 
179 	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
180 	loop = 1000;
181 	while ((data & 0x10000002) != 0x10000002) {
182 		udelay(10);
183 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
184 		loop--;
185 		if (!loop)
186 			break;
187 	}
188 
189 	if (!loop) {
190 		dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
191 		return -EBUSY;
192 	}
193 	WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
194 
195 	return 0;
196 }
197 
198 static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
199 {
200 	struct amdgpu_ring *ring;
201 	uint32_t offset, size;
202 	uint32_t table_size = 0;
203 	struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
204 	struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
205 	struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
206 	struct mmsch_v1_0_cmd_end end = { { 0 } };
207 	uint32_t *init_table = adev->virt.mm_table.cpu_addr;
208 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
209 
210 	direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
211 	direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
212 	direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
213 	end.cmd_header.command_type = MMSCH_COMMAND__END;
214 
215 	if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
216 		header->version = MMSCH_VERSION;
217 		header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
218 
219 		if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
220 			header->vce_table_offset = header->header_size;
221 		else
222 			header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
223 
224 		init_table += header->vce_table_offset;
225 
226 		ring = &adev->vce.ring[0];
227 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
228 					    lower_32_bits(ring->gpu_addr));
229 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
230 					    upper_32_bits(ring->gpu_addr));
231 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
232 					    ring->ring_size / 4);
233 
234 		/* BEGING OF MC_RESUME */
235 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
236 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
237 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
238 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
239 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
240 
241 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
242 		    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
243 						adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
244 		    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
245 						adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
246 		    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
247 						adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
248 		} else {
249 		    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
250 						adev->vce.gpu_addr >> 8);
251 		    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
252 						adev->vce.gpu_addr >> 8);
253 		    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
254 						adev->vce.gpu_addr >> 8);
255 		}
256 
257 		offset = AMDGPU_VCE_FIRMWARE_OFFSET;
258 		size = VCE_V4_0_FW_SIZE;
259 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
260 					    offset & 0x7FFFFFFF);
261 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
262 
263 		offset += size;
264 		size = VCE_V4_0_STACK_SIZE;
265 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
266 					    offset & 0x7FFFFFFF);
267 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
268 
269 		offset += size;
270 		size = VCE_V4_0_DATA_SIZE;
271 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
272 					    offset & 0x7FFFFFFF);
273 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
274 
275 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
276 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
277 						   0xffffffff, VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
278 
279 		/* end of MC_RESUME */
280 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
281 						   VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
282 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
283 						   ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
284 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
285 						   ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
286 
287 		MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
288 					      VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
289 					      VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
290 
291 		/* clear BUSY flag */
292 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
293 						   ~VCE_STATUS__JOB_BUSY_MASK, 0);
294 
295 		/* add end packet */
296 		memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
297 		table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
298 		header->vce_table_size = table_size;
299 
300 		return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
301 	}
302 
303 	return -EINVAL; /* already initializaed ? */
304 }
305 
306 /**
307  * vce_v4_0_start - start VCE block
308  *
309  * @adev: amdgpu_device pointer
310  *
311  * Setup and start the VCE block
312  */
313 static int vce_v4_0_start(struct amdgpu_device *adev)
314 {
315 	struct amdgpu_ring *ring;
316 	int r;
317 
318 	ring = &adev->vce.ring[0];
319 
320 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
321 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
322 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
323 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
324 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
325 
326 	ring = &adev->vce.ring[1];
327 
328 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
329 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
330 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
331 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
332 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
333 
334 	ring = &adev->vce.ring[2];
335 
336 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
337 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
338 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
339 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
340 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
341 
342 	vce_v4_0_mc_resume(adev);
343 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
344 			~VCE_STATUS__JOB_BUSY_MASK);
345 
346 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
347 
348 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
349 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
350 	mdelay(100);
351 
352 	r = vce_v4_0_firmware_loaded(adev);
353 
354 	/* clear BUSY flag */
355 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
356 
357 	if (r) {
358 		DRM_ERROR("VCE not responding, giving up!!!\n");
359 		return r;
360 	}
361 
362 	return 0;
363 }
364 
365 static int vce_v4_0_stop(struct amdgpu_device *adev)
366 {
367 
368 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
369 
370 	/* hold on ECPU */
371 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
372 			VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
373 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
374 
375 	/* clear BUSY flag */
376 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
377 
378 	/* Set Clock-Gating off */
379 	/* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
380 		vce_v4_0_set_vce_sw_clock_gating(adev, false);
381 	*/
382 
383 	return 0;
384 }
385 
386 static int vce_v4_0_early_init(void *handle)
387 {
388 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
389 
390 	if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
391 		adev->vce.num_rings = 1;
392 	else
393 		adev->vce.num_rings = 3;
394 
395 	vce_v4_0_set_ring_funcs(adev);
396 	vce_v4_0_set_irq_funcs(adev);
397 
398 	return 0;
399 }
400 
401 static int vce_v4_0_sw_init(void *handle)
402 {
403 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
404 	struct amdgpu_ring *ring;
405 	unsigned size;
406 	int r, i;
407 
408 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
409 	if (r)
410 		return r;
411 
412 	size  = (VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE) * 2;
413 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
414 		size += VCE_V4_0_FW_SIZE;
415 
416 	r = amdgpu_vce_sw_init(adev, size);
417 	if (r)
418 		return r;
419 
420 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
421 		const struct common_firmware_header *hdr;
422 		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
423 
424 		adev->vce.saved_bo = kmalloc(size, GFP_KERNEL);
425 		if (!adev->vce.saved_bo)
426 			return -ENOMEM;
427 
428 		hdr = (const struct common_firmware_header *)adev->vce.fw->data;
429 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
430 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
431 		adev->firmware.fw_size +=
432 			ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
433 		DRM_INFO("PSP loading VCE firmware\n");
434 	} else {
435 		r = amdgpu_vce_resume(adev);
436 		if (r)
437 			return r;
438 	}
439 
440 	for (i = 0; i < adev->vce.num_rings; i++) {
441 		ring = &adev->vce.ring[i];
442 		sprintf(ring->name, "vce%d", i);
443 		if (amdgpu_sriov_vf(adev)) {
444 			/* DOORBELL only works under SRIOV */
445 			ring->use_doorbell = true;
446 			if (i == 0)
447 				ring->doorbell_index = AMDGPU_DOORBELL64_RING0_1 * 2;
448 			else if (i == 1)
449 				ring->doorbell_index = AMDGPU_DOORBELL64_RING2_3 * 2;
450 			else
451 				ring->doorbell_index = AMDGPU_DOORBELL64_RING2_3 * 2 + 1;
452 		}
453 		r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
454 		if (r)
455 			return r;
456 	}
457 
458 	r = amdgpu_virt_alloc_mm_table(adev);
459 	if (r)
460 		return r;
461 
462 	return r;
463 }
464 
465 static int vce_v4_0_sw_fini(void *handle)
466 {
467 	int r;
468 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
469 
470 	/* free MM table */
471 	amdgpu_virt_free_mm_table(adev);
472 
473 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
474 		kfree(adev->vce.saved_bo);
475 		adev->vce.saved_bo = NULL;
476 	}
477 
478 	r = amdgpu_vce_suspend(adev);
479 	if (r)
480 		return r;
481 
482 	return amdgpu_vce_sw_fini(adev);
483 }
484 
485 static int vce_v4_0_hw_init(void *handle)
486 {
487 	int r, i;
488 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
489 
490 	if (amdgpu_sriov_vf(adev))
491 		r = vce_v4_0_sriov_start(adev);
492 	else
493 		r = vce_v4_0_start(adev);
494 	if (r)
495 		return r;
496 
497 	for (i = 0; i < adev->vce.num_rings; i++)
498 		adev->vce.ring[i].ready = false;
499 
500 	for (i = 0; i < adev->vce.num_rings; i++) {
501 		r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
502 		if (r)
503 			return r;
504 		else
505 			adev->vce.ring[i].ready = true;
506 	}
507 
508 	DRM_INFO("VCE initialized successfully.\n");
509 
510 	return 0;
511 }
512 
513 static int vce_v4_0_hw_fini(void *handle)
514 {
515 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
516 	int i;
517 
518 	if (!amdgpu_sriov_vf(adev)) {
519 		/* vce_v4_0_wait_for_idle(handle); */
520 		vce_v4_0_stop(adev);
521 	} else {
522 		/* full access mode, so don't touch any VCE register */
523 		DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
524 	}
525 
526 	for (i = 0; i < adev->vce.num_rings; i++)
527 		adev->vce.ring[i].ready = false;
528 
529 	return 0;
530 }
531 
532 static int vce_v4_0_suspend(void *handle)
533 {
534 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
535 	int r;
536 
537 	if (adev->vce.vcpu_bo == NULL)
538 		return 0;
539 
540 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
541 		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
542 		void *ptr = adev->vce.cpu_addr;
543 
544 		memcpy_fromio(adev->vce.saved_bo, ptr, size);
545 	}
546 
547 	r = vce_v4_0_hw_fini(adev);
548 	if (r)
549 		return r;
550 
551 	return amdgpu_vce_suspend(adev);
552 }
553 
554 static int vce_v4_0_resume(void *handle)
555 {
556 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
557 	int r;
558 
559 	if (adev->vce.vcpu_bo == NULL)
560 		return -EINVAL;
561 
562 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
563 		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
564 		void *ptr = adev->vce.cpu_addr;
565 
566 		memcpy_toio(ptr, adev->vce.saved_bo, size);
567 	} else {
568 		r = amdgpu_vce_resume(adev);
569 		if (r)
570 			return r;
571 	}
572 
573 	return vce_v4_0_hw_init(adev);
574 }
575 
576 static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
577 {
578 	uint32_t offset, size;
579 
580 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
581 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
582 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
583 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
584 
585 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
586 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
587 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
588 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
589 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
590 
591 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
592 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
593 			(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8));
594 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
595 			(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff);
596 	} else {
597 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
598 			(adev->vce.gpu_addr >> 8));
599 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
600 			(adev->vce.gpu_addr >> 40) & 0xff);
601 	}
602 
603 	offset = AMDGPU_VCE_FIRMWARE_OFFSET;
604 	size = VCE_V4_0_FW_SIZE;
605 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
606 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
607 
608 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
609 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
610 	offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
611 	size = VCE_V4_0_STACK_SIZE;
612 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
613 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
614 
615 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
616 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
617 	offset += size;
618 	size = VCE_V4_0_DATA_SIZE;
619 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
620 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
621 
622 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
623 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
624 			VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
625 			~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
626 }
627 
628 static int vce_v4_0_set_clockgating_state(void *handle,
629 					  enum amd_clockgating_state state)
630 {
631 	/* needed for driver unload*/
632 	return 0;
633 }
634 
635 #if 0
636 static bool vce_v4_0_is_idle(void *handle)
637 {
638 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
639 	u32 mask = 0;
640 
641 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
642 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
643 
644 	return !(RREG32(mmSRBM_STATUS2) & mask);
645 }
646 
647 static int vce_v4_0_wait_for_idle(void *handle)
648 {
649 	unsigned i;
650 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
651 
652 	for (i = 0; i < adev->usec_timeout; i++)
653 		if (vce_v4_0_is_idle(handle))
654 			return 0;
655 
656 	return -ETIMEDOUT;
657 }
658 
659 #define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
660 #define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
661 #define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
662 #define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
663 				      VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
664 
665 static bool vce_v4_0_check_soft_reset(void *handle)
666 {
667 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
668 	u32 srbm_soft_reset = 0;
669 
670 	/* According to VCE team , we should use VCE_STATUS instead
671 	 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
672 	 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
673 	 * instance's registers are accessed
674 	 * (0 for 1st instance, 10 for 2nd instance).
675 	 *
676 	 *VCE_STATUS
677 	 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
678 	 *|----+----+-----------+----+----+----+----------+---------+----|
679 	 *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
680 	 *
681 	 * VCE team suggest use bit 3--bit 6 for busy status check
682 	 */
683 	mutex_lock(&adev->grbm_idx_mutex);
684 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
685 	if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
686 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
687 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
688 	}
689 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
690 	if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
691 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
692 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
693 	}
694 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
695 	mutex_unlock(&adev->grbm_idx_mutex);
696 
697 	if (srbm_soft_reset) {
698 		adev->vce.srbm_soft_reset = srbm_soft_reset;
699 		return true;
700 	} else {
701 		adev->vce.srbm_soft_reset = 0;
702 		return false;
703 	}
704 }
705 
706 static int vce_v4_0_soft_reset(void *handle)
707 {
708 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
709 	u32 srbm_soft_reset;
710 
711 	if (!adev->vce.srbm_soft_reset)
712 		return 0;
713 	srbm_soft_reset = adev->vce.srbm_soft_reset;
714 
715 	if (srbm_soft_reset) {
716 		u32 tmp;
717 
718 		tmp = RREG32(mmSRBM_SOFT_RESET);
719 		tmp |= srbm_soft_reset;
720 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
721 		WREG32(mmSRBM_SOFT_RESET, tmp);
722 		tmp = RREG32(mmSRBM_SOFT_RESET);
723 
724 		udelay(50);
725 
726 		tmp &= ~srbm_soft_reset;
727 		WREG32(mmSRBM_SOFT_RESET, tmp);
728 		tmp = RREG32(mmSRBM_SOFT_RESET);
729 
730 		/* Wait a little for things to settle down */
731 		udelay(50);
732 	}
733 
734 	return 0;
735 }
736 
737 static int vce_v4_0_pre_soft_reset(void *handle)
738 {
739 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
740 
741 	if (!adev->vce.srbm_soft_reset)
742 		return 0;
743 
744 	mdelay(5);
745 
746 	return vce_v4_0_suspend(adev);
747 }
748 
749 
750 static int vce_v4_0_post_soft_reset(void *handle)
751 {
752 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
753 
754 	if (!adev->vce.srbm_soft_reset)
755 		return 0;
756 
757 	mdelay(5);
758 
759 	return vce_v4_0_resume(adev);
760 }
761 
762 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
763 {
764 	u32 tmp, data;
765 
766 	tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
767 	if (override)
768 		data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
769 	else
770 		data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
771 
772 	if (tmp != data)
773 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
774 }
775 
776 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
777 					     bool gated)
778 {
779 	u32 data;
780 
781 	/* Set Override to disable Clock Gating */
782 	vce_v4_0_override_vce_clock_gating(adev, true);
783 
784 	/* This function enables MGCG which is controlled by firmware.
785 	   With the clocks in the gated state the core is still
786 	   accessible but the firmware will throttle the clocks on the
787 	   fly as necessary.
788 	*/
789 	if (gated) {
790 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
791 		data |= 0x1ff;
792 		data &= ~0xef0000;
793 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
794 
795 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
796 		data |= 0x3ff000;
797 		data &= ~0xffc00000;
798 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
799 
800 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
801 		data |= 0x2;
802 		data &= ~0x00010000;
803 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
804 
805 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
806 		data |= 0x37f;
807 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
808 
809 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
810 		data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
811 			VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
812 			VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
813 			0x8;
814 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
815 	} else {
816 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
817 		data &= ~0x80010;
818 		data |= 0xe70008;
819 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
820 
821 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
822 		data |= 0xffc00000;
823 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
824 
825 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
826 		data |= 0x10000;
827 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
828 
829 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
830 		data &= ~0xffc00000;
831 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
832 
833 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
834 		data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
835 			  VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
836 			  VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
837 			  0x8);
838 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
839 	}
840 	vce_v4_0_override_vce_clock_gating(adev, false);
841 }
842 
843 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
844 {
845 	u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
846 
847 	if (enable)
848 		tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
849 	else
850 		tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
851 
852 	WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
853 }
854 
855 static int vce_v4_0_set_clockgating_state(void *handle,
856 					  enum amd_clockgating_state state)
857 {
858 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
859 	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
860 	int i;
861 
862 	if ((adev->asic_type == CHIP_POLARIS10) ||
863 		(adev->asic_type == CHIP_TONGA) ||
864 		(adev->asic_type == CHIP_FIJI))
865 		vce_v4_0_set_bypass_mode(adev, enable);
866 
867 	if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
868 		return 0;
869 
870 	mutex_lock(&adev->grbm_idx_mutex);
871 	for (i = 0; i < 2; i++) {
872 		/* Program VCE Instance 0 or 1 if not harvested */
873 		if (adev->vce.harvest_config & (1 << i))
874 			continue;
875 
876 		WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
877 
878 		if (enable) {
879 			/* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
880 			uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
881 			data &= ~(0xf | 0xff0);
882 			data |= ((0x0 << 0) | (0x04 << 4));
883 			WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
884 
885 			/* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
886 			data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
887 			data &= ~(0xf | 0xff0);
888 			data |= ((0x0 << 0) | (0x04 << 4));
889 			WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
890 		}
891 
892 		vce_v4_0_set_vce_sw_clock_gating(adev, enable);
893 	}
894 
895 	WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
896 	mutex_unlock(&adev->grbm_idx_mutex);
897 
898 	return 0;
899 }
900 
901 static int vce_v4_0_set_powergating_state(void *handle,
902 					  enum amd_powergating_state state)
903 {
904 	/* This doesn't actually powergate the VCE block.
905 	 * That's done in the dpm code via the SMC.  This
906 	 * just re-inits the block as necessary.  The actual
907 	 * gating still happens in the dpm code.  We should
908 	 * revisit this when there is a cleaner line between
909 	 * the smc and the hw blocks
910 	 */
911 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
912 
913 	if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE))
914 		return 0;
915 
916 	if (state == AMD_PG_STATE_GATE)
917 		/* XXX do we need a vce_v4_0_stop()? */
918 		return 0;
919 	else
920 		return vce_v4_0_start(adev);
921 }
922 #endif
923 
924 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
925 		struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch)
926 {
927 	amdgpu_ring_write(ring, VCE_CMD_IB_VM);
928 	amdgpu_ring_write(ring, vm_id);
929 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
930 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
931 	amdgpu_ring_write(ring, ib->length_dw);
932 }
933 
934 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
935 			u64 seq, unsigned flags)
936 {
937 	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
938 
939 	amdgpu_ring_write(ring, VCE_CMD_FENCE);
940 	amdgpu_ring_write(ring, addr);
941 	amdgpu_ring_write(ring, upper_32_bits(addr));
942 	amdgpu_ring_write(ring, seq);
943 	amdgpu_ring_write(ring, VCE_CMD_TRAP);
944 }
945 
946 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
947 {
948 	amdgpu_ring_write(ring, VCE_CMD_END);
949 }
950 
951 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
952 			 unsigned int vm_id, uint64_t pd_addr)
953 {
954 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
955 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
956 	unsigned eng = ring->vm_inv_eng;
957 
958 	pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
959 	pd_addr |= AMDGPU_PTE_VALID;
960 
961 	amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
962 	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
963 	amdgpu_ring_write(ring, upper_32_bits(pd_addr));
964 
965 	amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
966 	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
967 	amdgpu_ring_write(ring, lower_32_bits(pd_addr));
968 
969 	amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
970 	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
971 	amdgpu_ring_write(ring, 0xffffffff);
972 	amdgpu_ring_write(ring, lower_32_bits(pd_addr));
973 
974 	/* flush TLB */
975 	amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
976 	amdgpu_ring_write(ring,	(hub->vm_inv_eng0_req + eng) << 2);
977 	amdgpu_ring_write(ring, req);
978 
979 	/* wait for flush */
980 	amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
981 	amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
982 	amdgpu_ring_write(ring, 1 << vm_id);
983 	amdgpu_ring_write(ring, 1 << vm_id);
984 }
985 
986 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
987 					struct amdgpu_irq_src *source,
988 					unsigned type,
989 					enum amdgpu_interrupt_state state)
990 {
991 	uint32_t val = 0;
992 
993 	if (state == AMDGPU_IRQ_STATE_ENABLE)
994 		val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
995 
996 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
997 			~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
998 	return 0;
999 }
1000 
1001 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1002 				      struct amdgpu_irq_src *source,
1003 				      struct amdgpu_iv_entry *entry)
1004 {
1005 	DRM_DEBUG("IH: VCE\n");
1006 
1007 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_STATUS),
1008 			VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK,
1009 			~VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK);
1010 
1011 	switch (entry->src_data[0]) {
1012 	case 0:
1013 	case 1:
1014 	case 2:
1015 		amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
1016 		break;
1017 	default:
1018 		DRM_ERROR("Unhandled interrupt: %d %d\n",
1019 			  entry->src_id, entry->src_data[0]);
1020 		break;
1021 	}
1022 
1023 	return 0;
1024 }
1025 
1026 const struct amd_ip_funcs vce_v4_0_ip_funcs = {
1027 	.name = "vce_v4_0",
1028 	.early_init = vce_v4_0_early_init,
1029 	.late_init = NULL,
1030 	.sw_init = vce_v4_0_sw_init,
1031 	.sw_fini = vce_v4_0_sw_fini,
1032 	.hw_init = vce_v4_0_hw_init,
1033 	.hw_fini = vce_v4_0_hw_fini,
1034 	.suspend = vce_v4_0_suspend,
1035 	.resume = vce_v4_0_resume,
1036 	.is_idle = NULL /* vce_v4_0_is_idle */,
1037 	.wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1038 	.check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1039 	.pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1040 	.soft_reset = NULL /* vce_v4_0_soft_reset */,
1041 	.post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1042 	.set_clockgating_state = vce_v4_0_set_clockgating_state,
1043 	.set_powergating_state = NULL /* vce_v4_0_set_powergating_state */,
1044 };
1045 
1046 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1047 	.type = AMDGPU_RING_TYPE_VCE,
1048 	.align_mask = 0x3f,
1049 	.nop = VCE_CMD_NO_OP,
1050 	.support_64bit_ptrs = false,
1051 	.vmhub = AMDGPU_MMHUB,
1052 	.get_rptr = vce_v4_0_ring_get_rptr,
1053 	.get_wptr = vce_v4_0_ring_get_wptr,
1054 	.set_wptr = vce_v4_0_ring_set_wptr,
1055 	.parse_cs = amdgpu_vce_ring_parse_cs_vm,
1056 	.emit_frame_size =
1057 		17 + /* vce_v4_0_emit_vm_flush */
1058 		5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1059 		1, /* vce_v4_0_ring_insert_end */
1060 	.emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1061 	.emit_ib = vce_v4_0_ring_emit_ib,
1062 	.emit_vm_flush = vce_v4_0_emit_vm_flush,
1063 	.emit_fence = vce_v4_0_ring_emit_fence,
1064 	.test_ring = amdgpu_vce_ring_test_ring,
1065 	.test_ib = amdgpu_vce_ring_test_ib,
1066 	.insert_nop = amdgpu_ring_insert_nop,
1067 	.insert_end = vce_v4_0_ring_insert_end,
1068 	.pad_ib = amdgpu_ring_generic_pad_ib,
1069 	.begin_use = amdgpu_vce_ring_begin_use,
1070 	.end_use = amdgpu_vce_ring_end_use,
1071 };
1072 
1073 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1074 {
1075 	int i;
1076 
1077 	for (i = 0; i < adev->vce.num_rings; i++)
1078 		adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1079 	DRM_INFO("VCE enabled in VM mode\n");
1080 }
1081 
1082 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1083 	.set = vce_v4_0_set_interrupt_state,
1084 	.process = vce_v4_0_process_interrupt,
1085 };
1086 
1087 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1088 {
1089 	adev->vce.irq.num_types = 1;
1090 	adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1091 };
1092 
1093 const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1094 {
1095 	.type = AMD_IP_BLOCK_TYPE_VCE,
1096 	.major = 4,
1097 	.minor = 0,
1098 	.rev = 0,
1099 	.funcs = &vce_v4_0_ip_funcs,
1100 };
1101