xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c (revision 74ce1896)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  */
26 
27 #include <linux/firmware.h>
28 #include <drm/drmP.h>
29 #include "amdgpu.h"
30 #include "amdgpu_vce.h"
31 #include "soc15d.h"
32 #include "soc15_common.h"
33 #include "mmsch_v1_0.h"
34 
35 #include "vega10/soc15ip.h"
36 #include "vega10/VCE/vce_4_0_offset.h"
37 #include "vega10/VCE/vce_4_0_default.h"
38 #include "vega10/VCE/vce_4_0_sh_mask.h"
39 #include "vega10/MMHUB/mmhub_1_0_offset.h"
40 #include "vega10/MMHUB/mmhub_1_0_sh_mask.h"
41 
42 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
43 
44 #define VCE_V4_0_FW_SIZE	(384 * 1024)
45 #define VCE_V4_0_STACK_SIZE	(64 * 1024)
46 #define VCE_V4_0_DATA_SIZE	((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
47 
48 static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
49 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
50 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
51 
52 /**
53  * vce_v4_0_ring_get_rptr - get read pointer
54  *
55  * @ring: amdgpu_ring pointer
56  *
57  * Returns the current hardware read pointer
58  */
59 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
60 {
61 	struct amdgpu_device *adev = ring->adev;
62 
63 	if (ring == &adev->vce.ring[0])
64 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
65 	else if (ring == &adev->vce.ring[1])
66 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
67 	else
68 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
69 }
70 
71 /**
72  * vce_v4_0_ring_get_wptr - get write pointer
73  *
74  * @ring: amdgpu_ring pointer
75  *
76  * Returns the current hardware write pointer
77  */
78 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
79 {
80 	struct amdgpu_device *adev = ring->adev;
81 
82 	if (ring->use_doorbell)
83 		return adev->wb.wb[ring->wptr_offs];
84 
85 	if (ring == &adev->vce.ring[0])
86 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
87 	else if (ring == &adev->vce.ring[1])
88 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
89 	else
90 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
91 }
92 
93 /**
94  * vce_v4_0_ring_set_wptr - set write pointer
95  *
96  * @ring: amdgpu_ring pointer
97  *
98  * Commits the write pointer to the hardware
99  */
100 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
101 {
102 	struct amdgpu_device *adev = ring->adev;
103 
104 	if (ring->use_doorbell) {
105 		/* XXX check if swapping is necessary on BE */
106 		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
107 		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
108 		return;
109 	}
110 
111 	if (ring == &adev->vce.ring[0])
112 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
113 			lower_32_bits(ring->wptr));
114 	else if (ring == &adev->vce.ring[1])
115 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
116 			lower_32_bits(ring->wptr));
117 	else
118 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
119 			lower_32_bits(ring->wptr));
120 }
121 
122 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
123 {
124 	int i, j;
125 
126 	for (i = 0; i < 10; ++i) {
127 		for (j = 0; j < 100; ++j) {
128 			uint32_t status =
129 				RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
130 
131 			if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
132 				return 0;
133 			mdelay(10);
134 		}
135 
136 		DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
137 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
138 				VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
139 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
140 		mdelay(10);
141 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
142 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
143 		mdelay(10);
144 
145 	}
146 
147 	return -ETIMEDOUT;
148 }
149 
150 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
151 				struct amdgpu_mm_table *table)
152 {
153 	uint32_t data = 0, loop;
154 	uint64_t addr = table->gpu_addr;
155 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
156 	uint32_t size;
157 
158 	size = header->header_size + header->vce_table_size + header->uvd_table_size;
159 
160 	/* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
161 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
162 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
163 
164 	/* 2, update vmid of descriptor */
165 	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
166 	data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
167 	data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
168 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
169 
170 	/* 3, notify mmsch about the size of this descriptor */
171 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
172 
173 	/* 4, set resp to zero */
174 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
175 
176 	WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
177 	adev->wb.wb[adev->vce.ring[0].wptr_offs] = 0;
178 	adev->vce.ring[0].wptr = 0;
179 	adev->vce.ring[0].wptr_old = 0;
180 
181 	/* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
182 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
183 
184 	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
185 	loop = 1000;
186 	while ((data & 0x10000002) != 0x10000002) {
187 		udelay(10);
188 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
189 		loop--;
190 		if (!loop)
191 			break;
192 	}
193 
194 	if (!loop) {
195 		dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
196 		return -EBUSY;
197 	}
198 
199 	return 0;
200 }
201 
202 static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
203 {
204 	struct amdgpu_ring *ring;
205 	uint32_t offset, size;
206 	uint32_t table_size = 0;
207 	struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
208 	struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
209 	struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
210 	struct mmsch_v1_0_cmd_end end = { { 0 } };
211 	uint32_t *init_table = adev->virt.mm_table.cpu_addr;
212 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
213 
214 	direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
215 	direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
216 	direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
217 	end.cmd_header.command_type = MMSCH_COMMAND__END;
218 
219 	if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
220 		header->version = MMSCH_VERSION;
221 		header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
222 
223 		if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
224 			header->vce_table_offset = header->header_size;
225 		else
226 			header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
227 
228 		init_table += header->vce_table_offset;
229 
230 		ring = &adev->vce.ring[0];
231 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
232 					    lower_32_bits(ring->gpu_addr));
233 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
234 					    upper_32_bits(ring->gpu_addr));
235 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
236 					    ring->ring_size / 4);
237 
238 		/* BEGING OF MC_RESUME */
239 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
240 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
241 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
242 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
243 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
244 
245 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
246 		    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
247 						adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
248 		    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
249 						adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
250 		    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
251 						adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
252 		} else {
253 		    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
254 						adev->vce.gpu_addr >> 8);
255 		    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
256 						adev->vce.gpu_addr >> 8);
257 		    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
258 						adev->vce.gpu_addr >> 8);
259 		}
260 
261 		offset = AMDGPU_VCE_FIRMWARE_OFFSET;
262 		size = VCE_V4_0_FW_SIZE;
263 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
264 					    offset & 0x7FFFFFFF);
265 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
266 
267 		offset += size;
268 		size = VCE_V4_0_STACK_SIZE;
269 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
270 					    offset & 0x7FFFFFFF);
271 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
272 
273 		offset += size;
274 		size = VCE_V4_0_DATA_SIZE;
275 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
276 					    offset & 0x7FFFFFFF);
277 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
278 
279 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
280 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
281 						   VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
282 						   VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
283 
284 		/* end of MC_RESUME */
285 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
286 						   VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
287 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
288 						   ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
289 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
290 						   ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
291 
292 		MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
293 					      VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
294 					      VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
295 
296 		/* clear BUSY flag */
297 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
298 						   ~VCE_STATUS__JOB_BUSY_MASK, 0);
299 
300 		/* add end packet */
301 		memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
302 		table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
303 		header->vce_table_size = table_size;
304 	}
305 
306 	return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
307 }
308 
309 /**
310  * vce_v4_0_start - start VCE block
311  *
312  * @adev: amdgpu_device pointer
313  *
314  * Setup and start the VCE block
315  */
316 static int vce_v4_0_start(struct amdgpu_device *adev)
317 {
318 	struct amdgpu_ring *ring;
319 	int r;
320 
321 	ring = &adev->vce.ring[0];
322 
323 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
324 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
325 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
326 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
327 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
328 
329 	ring = &adev->vce.ring[1];
330 
331 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
332 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
333 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
334 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
335 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
336 
337 	ring = &adev->vce.ring[2];
338 
339 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
340 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
341 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
342 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
343 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
344 
345 	vce_v4_0_mc_resume(adev);
346 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
347 			~VCE_STATUS__JOB_BUSY_MASK);
348 
349 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
350 
351 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
352 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
353 	mdelay(100);
354 
355 	r = vce_v4_0_firmware_loaded(adev);
356 
357 	/* clear BUSY flag */
358 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
359 
360 	if (r) {
361 		DRM_ERROR("VCE not responding, giving up!!!\n");
362 		return r;
363 	}
364 
365 	return 0;
366 }
367 
368 static int vce_v4_0_stop(struct amdgpu_device *adev)
369 {
370 
371 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
372 
373 	/* hold on ECPU */
374 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
375 			VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
376 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
377 
378 	/* clear BUSY flag */
379 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
380 
381 	/* Set Clock-Gating off */
382 	/* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
383 		vce_v4_0_set_vce_sw_clock_gating(adev, false);
384 	*/
385 
386 	return 0;
387 }
388 
389 static int vce_v4_0_early_init(void *handle)
390 {
391 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
392 
393 	if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
394 		adev->vce.num_rings = 1;
395 	else
396 		adev->vce.num_rings = 3;
397 
398 	vce_v4_0_set_ring_funcs(adev);
399 	vce_v4_0_set_irq_funcs(adev);
400 
401 	return 0;
402 }
403 
404 static int vce_v4_0_sw_init(void *handle)
405 {
406 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
407 	struct amdgpu_ring *ring;
408 	unsigned size;
409 	int r, i;
410 
411 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
412 	if (r)
413 		return r;
414 
415 	size  = (VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE) * 2;
416 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
417 		size += VCE_V4_0_FW_SIZE;
418 
419 	r = amdgpu_vce_sw_init(adev, size);
420 	if (r)
421 		return r;
422 
423 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
424 		const struct common_firmware_header *hdr;
425 		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
426 
427 		adev->vce.saved_bo = kmalloc(size, GFP_KERNEL);
428 		if (!adev->vce.saved_bo)
429 			return -ENOMEM;
430 
431 		hdr = (const struct common_firmware_header *)adev->vce.fw->data;
432 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
433 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
434 		adev->firmware.fw_size +=
435 			ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
436 		DRM_INFO("PSP loading VCE firmware\n");
437 	} else {
438 		r = amdgpu_vce_resume(adev);
439 		if (r)
440 			return r;
441 	}
442 
443 	for (i = 0; i < adev->vce.num_rings; i++) {
444 		ring = &adev->vce.ring[i];
445 		sprintf(ring->name, "vce%d", i);
446 		if (amdgpu_sriov_vf(adev)) {
447 			/* DOORBELL only works under SRIOV */
448 			ring->use_doorbell = true;
449 
450 			/* currently only use the first encoding ring for sriov,
451 			 * so set unused location for other unused rings.
452 			 */
453 			if (i == 0)
454 				ring->doorbell_index = AMDGPU_DOORBELL64_VCE_RING0_1 * 2;
455 			else
456 				ring->doorbell_index = AMDGPU_DOORBELL64_VCE_RING2_3 * 2 + 1;
457 		}
458 		r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
459 		if (r)
460 			return r;
461 	}
462 
463 	r = amdgpu_virt_alloc_mm_table(adev);
464 	if (r)
465 		return r;
466 
467 	return r;
468 }
469 
470 static int vce_v4_0_sw_fini(void *handle)
471 {
472 	int r;
473 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
474 
475 	/* free MM table */
476 	amdgpu_virt_free_mm_table(adev);
477 
478 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
479 		kfree(adev->vce.saved_bo);
480 		adev->vce.saved_bo = NULL;
481 	}
482 
483 	r = amdgpu_vce_suspend(adev);
484 	if (r)
485 		return r;
486 
487 	return amdgpu_vce_sw_fini(adev);
488 }
489 
490 static int vce_v4_0_hw_init(void *handle)
491 {
492 	int r, i;
493 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
494 
495 	if (amdgpu_sriov_vf(adev))
496 		r = vce_v4_0_sriov_start(adev);
497 	else
498 		r = vce_v4_0_start(adev);
499 	if (r)
500 		return r;
501 
502 	for (i = 0; i < adev->vce.num_rings; i++)
503 		adev->vce.ring[i].ready = false;
504 
505 	for (i = 0; i < adev->vce.num_rings; i++) {
506 		r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
507 		if (r)
508 			return r;
509 		else
510 			adev->vce.ring[i].ready = true;
511 	}
512 
513 	DRM_INFO("VCE initialized successfully.\n");
514 
515 	return 0;
516 }
517 
518 static int vce_v4_0_hw_fini(void *handle)
519 {
520 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
521 	int i;
522 
523 	if (!amdgpu_sriov_vf(adev)) {
524 		/* vce_v4_0_wait_for_idle(handle); */
525 		vce_v4_0_stop(adev);
526 	} else {
527 		/* full access mode, so don't touch any VCE register */
528 		DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
529 	}
530 
531 	for (i = 0; i < adev->vce.num_rings; i++)
532 		adev->vce.ring[i].ready = false;
533 
534 	return 0;
535 }
536 
537 static int vce_v4_0_suspend(void *handle)
538 {
539 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
540 	int r;
541 
542 	if (adev->vce.vcpu_bo == NULL)
543 		return 0;
544 
545 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
546 		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
547 		void *ptr = adev->vce.cpu_addr;
548 
549 		memcpy_fromio(adev->vce.saved_bo, ptr, size);
550 	}
551 
552 	r = vce_v4_0_hw_fini(adev);
553 	if (r)
554 		return r;
555 
556 	return amdgpu_vce_suspend(adev);
557 }
558 
559 static int vce_v4_0_resume(void *handle)
560 {
561 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
562 	int r;
563 
564 	if (adev->vce.vcpu_bo == NULL)
565 		return -EINVAL;
566 
567 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
568 		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
569 		void *ptr = adev->vce.cpu_addr;
570 
571 		memcpy_toio(ptr, adev->vce.saved_bo, size);
572 	} else {
573 		r = amdgpu_vce_resume(adev);
574 		if (r)
575 			return r;
576 	}
577 
578 	return vce_v4_0_hw_init(adev);
579 }
580 
581 static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
582 {
583 	uint32_t offset, size;
584 
585 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
586 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
587 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
588 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
589 
590 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
591 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
592 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
593 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
594 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
595 
596 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
597 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
598 			(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8));
599 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
600 			(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff);
601 	} else {
602 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
603 			(adev->vce.gpu_addr >> 8));
604 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
605 			(adev->vce.gpu_addr >> 40) & 0xff);
606 	}
607 
608 	offset = AMDGPU_VCE_FIRMWARE_OFFSET;
609 	size = VCE_V4_0_FW_SIZE;
610 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
611 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
612 
613 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
614 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
615 	offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
616 	size = VCE_V4_0_STACK_SIZE;
617 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
618 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
619 
620 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
621 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
622 	offset += size;
623 	size = VCE_V4_0_DATA_SIZE;
624 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
625 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
626 
627 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
628 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
629 			VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
630 			~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
631 }
632 
633 static int vce_v4_0_set_clockgating_state(void *handle,
634 					  enum amd_clockgating_state state)
635 {
636 	/* needed for driver unload*/
637 	return 0;
638 }
639 
640 #if 0
641 static bool vce_v4_0_is_idle(void *handle)
642 {
643 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
644 	u32 mask = 0;
645 
646 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
647 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
648 
649 	return !(RREG32(mmSRBM_STATUS2) & mask);
650 }
651 
652 static int vce_v4_0_wait_for_idle(void *handle)
653 {
654 	unsigned i;
655 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
656 
657 	for (i = 0; i < adev->usec_timeout; i++)
658 		if (vce_v4_0_is_idle(handle))
659 			return 0;
660 
661 	return -ETIMEDOUT;
662 }
663 
664 #define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
665 #define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
666 #define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
667 #define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
668 				      VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
669 
670 static bool vce_v4_0_check_soft_reset(void *handle)
671 {
672 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
673 	u32 srbm_soft_reset = 0;
674 
675 	/* According to VCE team , we should use VCE_STATUS instead
676 	 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
677 	 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
678 	 * instance's registers are accessed
679 	 * (0 for 1st instance, 10 for 2nd instance).
680 	 *
681 	 *VCE_STATUS
682 	 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
683 	 *|----+----+-----------+----+----+----+----------+---------+----|
684 	 *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
685 	 *
686 	 * VCE team suggest use bit 3--bit 6 for busy status check
687 	 */
688 	mutex_lock(&adev->grbm_idx_mutex);
689 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
690 	if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
691 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
692 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
693 	}
694 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
695 	if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
696 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
697 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
698 	}
699 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
700 	mutex_unlock(&adev->grbm_idx_mutex);
701 
702 	if (srbm_soft_reset) {
703 		adev->vce.srbm_soft_reset = srbm_soft_reset;
704 		return true;
705 	} else {
706 		adev->vce.srbm_soft_reset = 0;
707 		return false;
708 	}
709 }
710 
711 static int vce_v4_0_soft_reset(void *handle)
712 {
713 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
714 	u32 srbm_soft_reset;
715 
716 	if (!adev->vce.srbm_soft_reset)
717 		return 0;
718 	srbm_soft_reset = adev->vce.srbm_soft_reset;
719 
720 	if (srbm_soft_reset) {
721 		u32 tmp;
722 
723 		tmp = RREG32(mmSRBM_SOFT_RESET);
724 		tmp |= srbm_soft_reset;
725 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
726 		WREG32(mmSRBM_SOFT_RESET, tmp);
727 		tmp = RREG32(mmSRBM_SOFT_RESET);
728 
729 		udelay(50);
730 
731 		tmp &= ~srbm_soft_reset;
732 		WREG32(mmSRBM_SOFT_RESET, tmp);
733 		tmp = RREG32(mmSRBM_SOFT_RESET);
734 
735 		/* Wait a little for things to settle down */
736 		udelay(50);
737 	}
738 
739 	return 0;
740 }
741 
742 static int vce_v4_0_pre_soft_reset(void *handle)
743 {
744 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
745 
746 	if (!adev->vce.srbm_soft_reset)
747 		return 0;
748 
749 	mdelay(5);
750 
751 	return vce_v4_0_suspend(adev);
752 }
753 
754 
755 static int vce_v4_0_post_soft_reset(void *handle)
756 {
757 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
758 
759 	if (!adev->vce.srbm_soft_reset)
760 		return 0;
761 
762 	mdelay(5);
763 
764 	return vce_v4_0_resume(adev);
765 }
766 
767 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
768 {
769 	u32 tmp, data;
770 
771 	tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
772 	if (override)
773 		data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
774 	else
775 		data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
776 
777 	if (tmp != data)
778 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
779 }
780 
781 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
782 					     bool gated)
783 {
784 	u32 data;
785 
786 	/* Set Override to disable Clock Gating */
787 	vce_v4_0_override_vce_clock_gating(adev, true);
788 
789 	/* This function enables MGCG which is controlled by firmware.
790 	   With the clocks in the gated state the core is still
791 	   accessible but the firmware will throttle the clocks on the
792 	   fly as necessary.
793 	*/
794 	if (gated) {
795 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
796 		data |= 0x1ff;
797 		data &= ~0xef0000;
798 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
799 
800 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
801 		data |= 0x3ff000;
802 		data &= ~0xffc00000;
803 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
804 
805 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
806 		data |= 0x2;
807 		data &= ~0x00010000;
808 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
809 
810 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
811 		data |= 0x37f;
812 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
813 
814 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
815 		data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
816 			VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
817 			VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
818 			0x8;
819 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
820 	} else {
821 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
822 		data &= ~0x80010;
823 		data |= 0xe70008;
824 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
825 
826 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
827 		data |= 0xffc00000;
828 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
829 
830 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
831 		data |= 0x10000;
832 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
833 
834 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
835 		data &= ~0xffc00000;
836 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
837 
838 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
839 		data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
840 			  VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
841 			  VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
842 			  0x8);
843 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
844 	}
845 	vce_v4_0_override_vce_clock_gating(adev, false);
846 }
847 
848 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
849 {
850 	u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
851 
852 	if (enable)
853 		tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
854 	else
855 		tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
856 
857 	WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
858 }
859 
860 static int vce_v4_0_set_clockgating_state(void *handle,
861 					  enum amd_clockgating_state state)
862 {
863 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
864 	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
865 	int i;
866 
867 	if ((adev->asic_type == CHIP_POLARIS10) ||
868 		(adev->asic_type == CHIP_TONGA) ||
869 		(adev->asic_type == CHIP_FIJI))
870 		vce_v4_0_set_bypass_mode(adev, enable);
871 
872 	if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
873 		return 0;
874 
875 	mutex_lock(&adev->grbm_idx_mutex);
876 	for (i = 0; i < 2; i++) {
877 		/* Program VCE Instance 0 or 1 if not harvested */
878 		if (adev->vce.harvest_config & (1 << i))
879 			continue;
880 
881 		WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
882 
883 		if (enable) {
884 			/* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
885 			uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
886 			data &= ~(0xf | 0xff0);
887 			data |= ((0x0 << 0) | (0x04 << 4));
888 			WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
889 
890 			/* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
891 			data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
892 			data &= ~(0xf | 0xff0);
893 			data |= ((0x0 << 0) | (0x04 << 4));
894 			WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
895 		}
896 
897 		vce_v4_0_set_vce_sw_clock_gating(adev, enable);
898 	}
899 
900 	WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
901 	mutex_unlock(&adev->grbm_idx_mutex);
902 
903 	return 0;
904 }
905 
906 static int vce_v4_0_set_powergating_state(void *handle,
907 					  enum amd_powergating_state state)
908 {
909 	/* This doesn't actually powergate the VCE block.
910 	 * That's done in the dpm code via the SMC.  This
911 	 * just re-inits the block as necessary.  The actual
912 	 * gating still happens in the dpm code.  We should
913 	 * revisit this when there is a cleaner line between
914 	 * the smc and the hw blocks
915 	 */
916 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
917 
918 	if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE))
919 		return 0;
920 
921 	if (state == AMD_PG_STATE_GATE)
922 		/* XXX do we need a vce_v4_0_stop()? */
923 		return 0;
924 	else
925 		return vce_v4_0_start(adev);
926 }
927 #endif
928 
929 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
930 		struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch)
931 {
932 	amdgpu_ring_write(ring, VCE_CMD_IB_VM);
933 	amdgpu_ring_write(ring, vm_id);
934 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
935 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
936 	amdgpu_ring_write(ring, ib->length_dw);
937 }
938 
939 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
940 			u64 seq, unsigned flags)
941 {
942 	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
943 
944 	amdgpu_ring_write(ring, VCE_CMD_FENCE);
945 	amdgpu_ring_write(ring, addr);
946 	amdgpu_ring_write(ring, upper_32_bits(addr));
947 	amdgpu_ring_write(ring, seq);
948 	amdgpu_ring_write(ring, VCE_CMD_TRAP);
949 }
950 
951 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
952 {
953 	amdgpu_ring_write(ring, VCE_CMD_END);
954 }
955 
956 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
957 			 unsigned int vm_id, uint64_t pd_addr)
958 {
959 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
960 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
961 	unsigned eng = ring->vm_inv_eng;
962 
963 	pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
964 	pd_addr |= AMDGPU_PTE_VALID;
965 
966 	amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
967 	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
968 	amdgpu_ring_write(ring, upper_32_bits(pd_addr));
969 
970 	amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
971 	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
972 	amdgpu_ring_write(ring, lower_32_bits(pd_addr));
973 
974 	amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
975 	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
976 	amdgpu_ring_write(ring, 0xffffffff);
977 	amdgpu_ring_write(ring, lower_32_bits(pd_addr));
978 
979 	/* flush TLB */
980 	amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
981 	amdgpu_ring_write(ring,	(hub->vm_inv_eng0_req + eng) << 2);
982 	amdgpu_ring_write(ring, req);
983 
984 	/* wait for flush */
985 	amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
986 	amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
987 	amdgpu_ring_write(ring, 1 << vm_id);
988 	amdgpu_ring_write(ring, 1 << vm_id);
989 }
990 
991 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
992 					struct amdgpu_irq_src *source,
993 					unsigned type,
994 					enum amdgpu_interrupt_state state)
995 {
996 	uint32_t val = 0;
997 
998 	if (!amdgpu_sriov_vf(adev)) {
999 		if (state == AMDGPU_IRQ_STATE_ENABLE)
1000 			val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
1001 
1002 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
1003 				~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
1004 	}
1005 	return 0;
1006 }
1007 
1008 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1009 				      struct amdgpu_irq_src *source,
1010 				      struct amdgpu_iv_entry *entry)
1011 {
1012 	DRM_DEBUG("IH: VCE\n");
1013 
1014 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_STATUS),
1015 			VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK,
1016 			~VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK);
1017 
1018 	switch (entry->src_data[0]) {
1019 	case 0:
1020 	case 1:
1021 	case 2:
1022 		amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
1023 		break;
1024 	default:
1025 		DRM_ERROR("Unhandled interrupt: %d %d\n",
1026 			  entry->src_id, entry->src_data[0]);
1027 		break;
1028 	}
1029 
1030 	return 0;
1031 }
1032 
1033 const struct amd_ip_funcs vce_v4_0_ip_funcs = {
1034 	.name = "vce_v4_0",
1035 	.early_init = vce_v4_0_early_init,
1036 	.late_init = NULL,
1037 	.sw_init = vce_v4_0_sw_init,
1038 	.sw_fini = vce_v4_0_sw_fini,
1039 	.hw_init = vce_v4_0_hw_init,
1040 	.hw_fini = vce_v4_0_hw_fini,
1041 	.suspend = vce_v4_0_suspend,
1042 	.resume = vce_v4_0_resume,
1043 	.is_idle = NULL /* vce_v4_0_is_idle */,
1044 	.wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1045 	.check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1046 	.pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1047 	.soft_reset = NULL /* vce_v4_0_soft_reset */,
1048 	.post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1049 	.set_clockgating_state = vce_v4_0_set_clockgating_state,
1050 	.set_powergating_state = NULL /* vce_v4_0_set_powergating_state */,
1051 };
1052 
1053 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1054 	.type = AMDGPU_RING_TYPE_VCE,
1055 	.align_mask = 0x3f,
1056 	.nop = VCE_CMD_NO_OP,
1057 	.support_64bit_ptrs = false,
1058 	.vmhub = AMDGPU_MMHUB,
1059 	.get_rptr = vce_v4_0_ring_get_rptr,
1060 	.get_wptr = vce_v4_0_ring_get_wptr,
1061 	.set_wptr = vce_v4_0_ring_set_wptr,
1062 	.parse_cs = amdgpu_vce_ring_parse_cs_vm,
1063 	.emit_frame_size =
1064 		17 + /* vce_v4_0_emit_vm_flush */
1065 		5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1066 		1, /* vce_v4_0_ring_insert_end */
1067 	.emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1068 	.emit_ib = vce_v4_0_ring_emit_ib,
1069 	.emit_vm_flush = vce_v4_0_emit_vm_flush,
1070 	.emit_fence = vce_v4_0_ring_emit_fence,
1071 	.test_ring = amdgpu_vce_ring_test_ring,
1072 	.test_ib = amdgpu_vce_ring_test_ib,
1073 	.insert_nop = amdgpu_ring_insert_nop,
1074 	.insert_end = vce_v4_0_ring_insert_end,
1075 	.pad_ib = amdgpu_ring_generic_pad_ib,
1076 	.begin_use = amdgpu_vce_ring_begin_use,
1077 	.end_use = amdgpu_vce_ring_end_use,
1078 };
1079 
1080 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1081 {
1082 	int i;
1083 
1084 	for (i = 0; i < adev->vce.num_rings; i++)
1085 		adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1086 	DRM_INFO("VCE enabled in VM mode\n");
1087 }
1088 
1089 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1090 	.set = vce_v4_0_set_interrupt_state,
1091 	.process = vce_v4_0_process_interrupt,
1092 };
1093 
1094 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1095 {
1096 	adev->vce.irq.num_types = 1;
1097 	adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1098 };
1099 
1100 const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1101 {
1102 	.type = AMD_IP_BLOCK_TYPE_VCE,
1103 	.major = 4,
1104 	.minor = 0,
1105 	.rev = 0,
1106 	.funcs = &vce_v4_0_ip_funcs,
1107 };
1108