xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c (revision aa5873dc)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  */
26 
27 #include <linux/firmware.h>
28 #include <drm/drmP.h>
29 #include "amdgpu.h"
30 #include "amdgpu_vce.h"
31 #include "soc15.h"
32 #include "soc15d.h"
33 #include "soc15_common.h"
34 #include "mmsch_v1_0.h"
35 
36 #include "vce/vce_4_0_offset.h"
37 #include "vce/vce_4_0_default.h"
38 #include "vce/vce_4_0_sh_mask.h"
39 #include "mmhub/mmhub_1_0_offset.h"
40 #include "mmhub/mmhub_1_0_sh_mask.h"
41 
42 #include "ivsrcid/vce/irqsrcs_vce_4_0.h"
43 
44 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
45 
46 #define VCE_V4_0_FW_SIZE	(384 * 1024)
47 #define VCE_V4_0_STACK_SIZE	(64 * 1024)
48 #define VCE_V4_0_DATA_SIZE	((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
49 
50 static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
51 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
52 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
53 
54 /**
55  * vce_v4_0_ring_get_rptr - get read pointer
56  *
57  * @ring: amdgpu_ring pointer
58  *
59  * Returns the current hardware read pointer
60  */
61 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
62 {
63 	struct amdgpu_device *adev = ring->adev;
64 
65 	if (ring->me == 0)
66 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
67 	else if (ring->me == 1)
68 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
69 	else
70 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
71 }
72 
73 /**
74  * vce_v4_0_ring_get_wptr - get write pointer
75  *
76  * @ring: amdgpu_ring pointer
77  *
78  * Returns the current hardware write pointer
79  */
80 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
81 {
82 	struct amdgpu_device *adev = ring->adev;
83 
84 	if (ring->use_doorbell)
85 		return adev->wb.wb[ring->wptr_offs];
86 
87 	if (ring->me == 0)
88 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
89 	else if (ring->me == 1)
90 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
91 	else
92 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
93 }
94 
95 /**
96  * vce_v4_0_ring_set_wptr - set write pointer
97  *
98  * @ring: amdgpu_ring pointer
99  *
100  * Commits the write pointer to the hardware
101  */
102 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
103 {
104 	struct amdgpu_device *adev = ring->adev;
105 
106 	if (ring->use_doorbell) {
107 		/* XXX check if swapping is necessary on BE */
108 		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
109 		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
110 		return;
111 	}
112 
113 	if (ring->me == 0)
114 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
115 			lower_32_bits(ring->wptr));
116 	else if (ring->me == 1)
117 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
118 			lower_32_bits(ring->wptr));
119 	else
120 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
121 			lower_32_bits(ring->wptr));
122 }
123 
124 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
125 {
126 	int i, j;
127 
128 	for (i = 0; i < 10; ++i) {
129 		for (j = 0; j < 100; ++j) {
130 			uint32_t status =
131 				RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
132 
133 			if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
134 				return 0;
135 			mdelay(10);
136 		}
137 
138 		DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
139 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
140 				VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
141 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
142 		mdelay(10);
143 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
144 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
145 		mdelay(10);
146 
147 	}
148 
149 	return -ETIMEDOUT;
150 }
151 
152 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
153 				struct amdgpu_mm_table *table)
154 {
155 	uint32_t data = 0, loop;
156 	uint64_t addr = table->gpu_addr;
157 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
158 	uint32_t size;
159 
160 	size = header->header_size + header->vce_table_size + header->uvd_table_size;
161 
162 	/* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
163 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
164 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
165 
166 	/* 2, update vmid of descriptor */
167 	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
168 	data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
169 	data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
170 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
171 
172 	/* 3, notify mmsch about the size of this descriptor */
173 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
174 
175 	/* 4, set resp to zero */
176 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
177 
178 	WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
179 	adev->wb.wb[adev->vce.ring[0].wptr_offs] = 0;
180 	adev->vce.ring[0].wptr = 0;
181 	adev->vce.ring[0].wptr_old = 0;
182 
183 	/* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
184 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
185 
186 	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
187 	loop = 1000;
188 	while ((data & 0x10000002) != 0x10000002) {
189 		udelay(10);
190 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
191 		loop--;
192 		if (!loop)
193 			break;
194 	}
195 
196 	if (!loop) {
197 		dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
198 		return -EBUSY;
199 	}
200 
201 	return 0;
202 }
203 
204 static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
205 {
206 	struct amdgpu_ring *ring;
207 	uint32_t offset, size;
208 	uint32_t table_size = 0;
209 	struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
210 	struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
211 	struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
212 	struct mmsch_v1_0_cmd_end end = { { 0 } };
213 	uint32_t *init_table = adev->virt.mm_table.cpu_addr;
214 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
215 
216 	direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
217 	direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
218 	direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
219 	end.cmd_header.command_type = MMSCH_COMMAND__END;
220 
221 	if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
222 		header->version = MMSCH_VERSION;
223 		header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
224 
225 		if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
226 			header->vce_table_offset = header->header_size;
227 		else
228 			header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
229 
230 		init_table += header->vce_table_offset;
231 
232 		ring = &adev->vce.ring[0];
233 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
234 					    lower_32_bits(ring->gpu_addr));
235 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
236 					    upper_32_bits(ring->gpu_addr));
237 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
238 					    ring->ring_size / 4);
239 
240 		/* BEGING OF MC_RESUME */
241 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
242 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
243 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
244 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
245 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
246 
247 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
248 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
249 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
250 						adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
251 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
252 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
253 						(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff);
254 		} else {
255 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
256 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
257 						adev->vce.gpu_addr >> 8);
258 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
259 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
260 						(adev->vce.gpu_addr >> 40) & 0xff);
261 		}
262 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
263 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
264 						adev->vce.gpu_addr >> 8);
265 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
266 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR1),
267 						(adev->vce.gpu_addr >> 40) & 0xff);
268 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
269 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
270 						adev->vce.gpu_addr >> 8);
271 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
272 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR2),
273 						(adev->vce.gpu_addr >> 40) & 0xff);
274 
275 		offset = AMDGPU_VCE_FIRMWARE_OFFSET;
276 		size = VCE_V4_0_FW_SIZE;
277 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
278 					offset & ~0x0f000000);
279 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
280 
281 		offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
282 		size = VCE_V4_0_STACK_SIZE;
283 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
284 					(offset & ~0x0f000000) | (1 << 24));
285 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
286 
287 		offset += size;
288 		size = VCE_V4_0_DATA_SIZE;
289 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
290 					(offset & ~0x0f000000) | (2 << 24));
291 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
292 
293 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
294 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
295 						   VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
296 						   VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
297 
298 		/* end of MC_RESUME */
299 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
300 						   VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
301 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
302 						   ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
303 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
304 						   ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
305 
306 		MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
307 					      VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
308 					      VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
309 
310 		/* clear BUSY flag */
311 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
312 						   ~VCE_STATUS__JOB_BUSY_MASK, 0);
313 
314 		/* add end packet */
315 		memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
316 		table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
317 		header->vce_table_size = table_size;
318 	}
319 
320 	return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
321 }
322 
323 /**
324  * vce_v4_0_start - start VCE block
325  *
326  * @adev: amdgpu_device pointer
327  *
328  * Setup and start the VCE block
329  */
330 static int vce_v4_0_start(struct amdgpu_device *adev)
331 {
332 	struct amdgpu_ring *ring;
333 	int r;
334 
335 	ring = &adev->vce.ring[0];
336 
337 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
338 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
339 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
340 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
341 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
342 
343 	ring = &adev->vce.ring[1];
344 
345 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
346 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
347 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
348 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
349 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
350 
351 	ring = &adev->vce.ring[2];
352 
353 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
354 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
355 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
356 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
357 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
358 
359 	vce_v4_0_mc_resume(adev);
360 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
361 			~VCE_STATUS__JOB_BUSY_MASK);
362 
363 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
364 
365 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
366 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
367 	mdelay(100);
368 
369 	r = vce_v4_0_firmware_loaded(adev);
370 
371 	/* clear BUSY flag */
372 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
373 
374 	if (r) {
375 		DRM_ERROR("VCE not responding, giving up!!!\n");
376 		return r;
377 	}
378 
379 	return 0;
380 }
381 
382 static int vce_v4_0_stop(struct amdgpu_device *adev)
383 {
384 
385 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
386 
387 	/* hold on ECPU */
388 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
389 			VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
390 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
391 
392 	/* clear BUSY flag */
393 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
394 
395 	/* Set Clock-Gating off */
396 	/* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
397 		vce_v4_0_set_vce_sw_clock_gating(adev, false);
398 	*/
399 
400 	return 0;
401 }
402 
403 static int vce_v4_0_early_init(void *handle)
404 {
405 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
406 
407 	if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
408 		adev->vce.num_rings = 1;
409 	else
410 		adev->vce.num_rings = 3;
411 
412 	vce_v4_0_set_ring_funcs(adev);
413 	vce_v4_0_set_irq_funcs(adev);
414 
415 	return 0;
416 }
417 
418 static int vce_v4_0_sw_init(void *handle)
419 {
420 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
421 	struct amdgpu_ring *ring;
422 
423 	unsigned size;
424 	int r, i;
425 
426 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
427 	if (r)
428 		return r;
429 
430 	size  = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE;
431 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
432 		size += VCE_V4_0_FW_SIZE;
433 
434 	r = amdgpu_vce_sw_init(adev, size);
435 	if (r)
436 		return r;
437 
438 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
439 		const struct common_firmware_header *hdr;
440 		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
441 
442 		adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL);
443 		if (!adev->vce.saved_bo)
444 			return -ENOMEM;
445 
446 		hdr = (const struct common_firmware_header *)adev->vce.fw->data;
447 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
448 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
449 		adev->firmware.fw_size +=
450 			ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
451 		DRM_INFO("PSP loading VCE firmware\n");
452 	} else {
453 		r = amdgpu_vce_resume(adev);
454 		if (r)
455 			return r;
456 	}
457 
458 	for (i = 0; i < adev->vce.num_rings; i++) {
459 		ring = &adev->vce.ring[i];
460 		sprintf(ring->name, "vce%d", i);
461 		if (amdgpu_sriov_vf(adev)) {
462 			/* DOORBELL only works under SRIOV */
463 			ring->use_doorbell = true;
464 
465 			/* currently only use the first encoding ring for sriov,
466 			 * so set unused location for other unused rings.
467 			 */
468 			if (i == 0)
469 				ring->doorbell_index = AMDGPU_DOORBELL64_VCE_RING0_1 * 2;
470 			else
471 				ring->doorbell_index = AMDGPU_DOORBELL64_VCE_RING2_3 * 2 + 1;
472 		}
473 		r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
474 		if (r)
475 			return r;
476 	}
477 
478 
479 	r = amdgpu_vce_entity_init(adev);
480 	if (r)
481 		return r;
482 
483 	r = amdgpu_virt_alloc_mm_table(adev);
484 	if (r)
485 		return r;
486 
487 	return r;
488 }
489 
490 static int vce_v4_0_sw_fini(void *handle)
491 {
492 	int r;
493 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
494 
495 	/* free MM table */
496 	amdgpu_virt_free_mm_table(adev);
497 
498 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
499 		kvfree(adev->vce.saved_bo);
500 		adev->vce.saved_bo = NULL;
501 	}
502 
503 	r = amdgpu_vce_suspend(adev);
504 	if (r)
505 		return r;
506 
507 	return amdgpu_vce_sw_fini(adev);
508 }
509 
510 static int vce_v4_0_hw_init(void *handle)
511 {
512 	int r, i;
513 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
514 
515 	if (amdgpu_sriov_vf(adev))
516 		r = vce_v4_0_sriov_start(adev);
517 	else
518 		r = vce_v4_0_start(adev);
519 	if (r)
520 		return r;
521 
522 	for (i = 0; i < adev->vce.num_rings; i++)
523 		adev->vce.ring[i].ready = false;
524 
525 	for (i = 0; i < adev->vce.num_rings; i++) {
526 		r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
527 		if (r)
528 			return r;
529 		else
530 			adev->vce.ring[i].ready = true;
531 	}
532 
533 	DRM_INFO("VCE initialized successfully.\n");
534 
535 	return 0;
536 }
537 
538 static int vce_v4_0_hw_fini(void *handle)
539 {
540 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
541 	int i;
542 
543 	if (!amdgpu_sriov_vf(adev)) {
544 		/* vce_v4_0_wait_for_idle(handle); */
545 		vce_v4_0_stop(adev);
546 	} else {
547 		/* full access mode, so don't touch any VCE register */
548 		DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
549 	}
550 
551 	for (i = 0; i < adev->vce.num_rings; i++)
552 		adev->vce.ring[i].ready = false;
553 
554 	return 0;
555 }
556 
557 static int vce_v4_0_suspend(void *handle)
558 {
559 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
560 	int r;
561 
562 	if (adev->vce.vcpu_bo == NULL)
563 		return 0;
564 
565 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
566 		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
567 		void *ptr = adev->vce.cpu_addr;
568 
569 		memcpy_fromio(adev->vce.saved_bo, ptr, size);
570 	}
571 
572 	r = vce_v4_0_hw_fini(adev);
573 	if (r)
574 		return r;
575 
576 	return amdgpu_vce_suspend(adev);
577 }
578 
579 static int vce_v4_0_resume(void *handle)
580 {
581 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
582 	int r;
583 
584 	if (adev->vce.vcpu_bo == NULL)
585 		return -EINVAL;
586 
587 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
588 		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
589 		void *ptr = adev->vce.cpu_addr;
590 
591 		memcpy_toio(ptr, adev->vce.saved_bo, size);
592 	} else {
593 		r = amdgpu_vce_resume(adev);
594 		if (r)
595 			return r;
596 	}
597 
598 	return vce_v4_0_hw_init(adev);
599 }
600 
601 static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
602 {
603 	uint32_t offset, size;
604 	uint64_t tmr_mc_addr;
605 
606 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
607 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
608 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
609 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
610 
611 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
612 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
613 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
614 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
615 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
616 
617 	offset = AMDGPU_VCE_FIRMWARE_OFFSET;
618 
619 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
620 		tmr_mc_addr = (uint64_t)(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi) << 32 |
621 										adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
622 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
623 			(tmr_mc_addr >> 8));
624 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
625 			(tmr_mc_addr >> 40) & 0xff);
626 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
627 	} else {
628 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
629 			(adev->vce.gpu_addr >> 8));
630 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
631 			(adev->vce.gpu_addr >> 40) & 0xff);
632 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
633 	}
634 
635 	size = VCE_V4_0_FW_SIZE;
636 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
637 
638 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
639 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
640 	offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
641 	size = VCE_V4_0_STACK_SIZE;
642 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
643 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
644 
645 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
646 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
647 	offset += size;
648 	size = VCE_V4_0_DATA_SIZE;
649 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
650 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
651 
652 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
653 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
654 			VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
655 			~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
656 }
657 
658 static int vce_v4_0_set_clockgating_state(void *handle,
659 					  enum amd_clockgating_state state)
660 {
661 	/* needed for driver unload*/
662 	return 0;
663 }
664 
665 #if 0
666 static bool vce_v4_0_is_idle(void *handle)
667 {
668 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
669 	u32 mask = 0;
670 
671 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
672 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
673 
674 	return !(RREG32(mmSRBM_STATUS2) & mask);
675 }
676 
677 static int vce_v4_0_wait_for_idle(void *handle)
678 {
679 	unsigned i;
680 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
681 
682 	for (i = 0; i < adev->usec_timeout; i++)
683 		if (vce_v4_0_is_idle(handle))
684 			return 0;
685 
686 	return -ETIMEDOUT;
687 }
688 
689 #define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
690 #define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
691 #define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
692 #define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
693 				      VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
694 
695 static bool vce_v4_0_check_soft_reset(void *handle)
696 {
697 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
698 	u32 srbm_soft_reset = 0;
699 
700 	/* According to VCE team , we should use VCE_STATUS instead
701 	 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
702 	 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
703 	 * instance's registers are accessed
704 	 * (0 for 1st instance, 10 for 2nd instance).
705 	 *
706 	 *VCE_STATUS
707 	 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
708 	 *|----+----+-----------+----+----+----+----------+---------+----|
709 	 *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
710 	 *
711 	 * VCE team suggest use bit 3--bit 6 for busy status check
712 	 */
713 	mutex_lock(&adev->grbm_idx_mutex);
714 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
715 	if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
716 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
717 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
718 	}
719 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
720 	if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
721 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
722 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
723 	}
724 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
725 	mutex_unlock(&adev->grbm_idx_mutex);
726 
727 	if (srbm_soft_reset) {
728 		adev->vce.srbm_soft_reset = srbm_soft_reset;
729 		return true;
730 	} else {
731 		adev->vce.srbm_soft_reset = 0;
732 		return false;
733 	}
734 }
735 
736 static int vce_v4_0_soft_reset(void *handle)
737 {
738 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
739 	u32 srbm_soft_reset;
740 
741 	if (!adev->vce.srbm_soft_reset)
742 		return 0;
743 	srbm_soft_reset = adev->vce.srbm_soft_reset;
744 
745 	if (srbm_soft_reset) {
746 		u32 tmp;
747 
748 		tmp = RREG32(mmSRBM_SOFT_RESET);
749 		tmp |= srbm_soft_reset;
750 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
751 		WREG32(mmSRBM_SOFT_RESET, tmp);
752 		tmp = RREG32(mmSRBM_SOFT_RESET);
753 
754 		udelay(50);
755 
756 		tmp &= ~srbm_soft_reset;
757 		WREG32(mmSRBM_SOFT_RESET, tmp);
758 		tmp = RREG32(mmSRBM_SOFT_RESET);
759 
760 		/* Wait a little for things to settle down */
761 		udelay(50);
762 	}
763 
764 	return 0;
765 }
766 
767 static int vce_v4_0_pre_soft_reset(void *handle)
768 {
769 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
770 
771 	if (!adev->vce.srbm_soft_reset)
772 		return 0;
773 
774 	mdelay(5);
775 
776 	return vce_v4_0_suspend(adev);
777 }
778 
779 
780 static int vce_v4_0_post_soft_reset(void *handle)
781 {
782 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
783 
784 	if (!adev->vce.srbm_soft_reset)
785 		return 0;
786 
787 	mdelay(5);
788 
789 	return vce_v4_0_resume(adev);
790 }
791 
792 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
793 {
794 	u32 tmp, data;
795 
796 	tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
797 	if (override)
798 		data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
799 	else
800 		data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
801 
802 	if (tmp != data)
803 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
804 }
805 
806 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
807 					     bool gated)
808 {
809 	u32 data;
810 
811 	/* Set Override to disable Clock Gating */
812 	vce_v4_0_override_vce_clock_gating(adev, true);
813 
814 	/* This function enables MGCG which is controlled by firmware.
815 	   With the clocks in the gated state the core is still
816 	   accessible but the firmware will throttle the clocks on the
817 	   fly as necessary.
818 	*/
819 	if (gated) {
820 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
821 		data |= 0x1ff;
822 		data &= ~0xef0000;
823 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
824 
825 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
826 		data |= 0x3ff000;
827 		data &= ~0xffc00000;
828 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
829 
830 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
831 		data |= 0x2;
832 		data &= ~0x00010000;
833 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
834 
835 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
836 		data |= 0x37f;
837 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
838 
839 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
840 		data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
841 			VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
842 			VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
843 			0x8;
844 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
845 	} else {
846 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
847 		data &= ~0x80010;
848 		data |= 0xe70008;
849 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
850 
851 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
852 		data |= 0xffc00000;
853 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
854 
855 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
856 		data |= 0x10000;
857 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
858 
859 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
860 		data &= ~0xffc00000;
861 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
862 
863 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
864 		data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
865 			  VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
866 			  VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
867 			  0x8);
868 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
869 	}
870 	vce_v4_0_override_vce_clock_gating(adev, false);
871 }
872 
873 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
874 {
875 	u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
876 
877 	if (enable)
878 		tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
879 	else
880 		tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
881 
882 	WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
883 }
884 
885 static int vce_v4_0_set_clockgating_state(void *handle,
886 					  enum amd_clockgating_state state)
887 {
888 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
889 	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
890 	int i;
891 
892 	if ((adev->asic_type == CHIP_POLARIS10) ||
893 		(adev->asic_type == CHIP_TONGA) ||
894 		(adev->asic_type == CHIP_FIJI))
895 		vce_v4_0_set_bypass_mode(adev, enable);
896 
897 	if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
898 		return 0;
899 
900 	mutex_lock(&adev->grbm_idx_mutex);
901 	for (i = 0; i < 2; i++) {
902 		/* Program VCE Instance 0 or 1 if not harvested */
903 		if (adev->vce.harvest_config & (1 << i))
904 			continue;
905 
906 		WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
907 
908 		if (enable) {
909 			/* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
910 			uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
911 			data &= ~(0xf | 0xff0);
912 			data |= ((0x0 << 0) | (0x04 << 4));
913 			WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
914 
915 			/* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
916 			data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
917 			data &= ~(0xf | 0xff0);
918 			data |= ((0x0 << 0) | (0x04 << 4));
919 			WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
920 		}
921 
922 		vce_v4_0_set_vce_sw_clock_gating(adev, enable);
923 	}
924 
925 	WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
926 	mutex_unlock(&adev->grbm_idx_mutex);
927 
928 	return 0;
929 }
930 
931 static int vce_v4_0_set_powergating_state(void *handle,
932 					  enum amd_powergating_state state)
933 {
934 	/* This doesn't actually powergate the VCE block.
935 	 * That's done in the dpm code via the SMC.  This
936 	 * just re-inits the block as necessary.  The actual
937 	 * gating still happens in the dpm code.  We should
938 	 * revisit this when there is a cleaner line between
939 	 * the smc and the hw blocks
940 	 */
941 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
942 
943 	if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE))
944 		return 0;
945 
946 	if (state == AMD_PG_STATE_GATE)
947 		/* XXX do we need a vce_v4_0_stop()? */
948 		return 0;
949 	else
950 		return vce_v4_0_start(adev);
951 }
952 #endif
953 
954 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
955 		struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch)
956 {
957 	amdgpu_ring_write(ring, VCE_CMD_IB_VM);
958 	amdgpu_ring_write(ring, vmid);
959 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
960 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
961 	amdgpu_ring_write(ring, ib->length_dw);
962 }
963 
964 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
965 			u64 seq, unsigned flags)
966 {
967 	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
968 
969 	amdgpu_ring_write(ring, VCE_CMD_FENCE);
970 	amdgpu_ring_write(ring, addr);
971 	amdgpu_ring_write(ring, upper_32_bits(addr));
972 	amdgpu_ring_write(ring, seq);
973 	amdgpu_ring_write(ring, VCE_CMD_TRAP);
974 }
975 
976 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
977 {
978 	amdgpu_ring_write(ring, VCE_CMD_END);
979 }
980 
981 static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
982 				   uint32_t val, uint32_t mask)
983 {
984 	amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
985 	amdgpu_ring_write(ring,	reg << 2);
986 	amdgpu_ring_write(ring, mask);
987 	amdgpu_ring_write(ring, val);
988 }
989 
990 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
991 				   unsigned int vmid, uint64_t pd_addr)
992 {
993 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
994 
995 	pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
996 
997 	/* wait for reg writes */
998 	vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2,
999 			       lower_32_bits(pd_addr), 0xffffffff);
1000 }
1001 
1002 static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring,
1003 			       uint32_t reg, uint32_t val)
1004 {
1005 	amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
1006 	amdgpu_ring_write(ring,	reg << 2);
1007 	amdgpu_ring_write(ring, val);
1008 }
1009 
1010 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
1011 					struct amdgpu_irq_src *source,
1012 					unsigned type,
1013 					enum amdgpu_interrupt_state state)
1014 {
1015 	uint32_t val = 0;
1016 
1017 	if (!amdgpu_sriov_vf(adev)) {
1018 		if (state == AMDGPU_IRQ_STATE_ENABLE)
1019 			val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
1020 
1021 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
1022 				~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
1023 	}
1024 	return 0;
1025 }
1026 
1027 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1028 				      struct amdgpu_irq_src *source,
1029 				      struct amdgpu_iv_entry *entry)
1030 {
1031 	DRM_DEBUG("IH: VCE\n");
1032 
1033 	switch (entry->src_data[0]) {
1034 	case 0:
1035 	case 1:
1036 	case 2:
1037 		amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
1038 		break;
1039 	default:
1040 		DRM_ERROR("Unhandled interrupt: %d %d\n",
1041 			  entry->src_id, entry->src_data[0]);
1042 		break;
1043 	}
1044 
1045 	return 0;
1046 }
1047 
1048 const struct amd_ip_funcs vce_v4_0_ip_funcs = {
1049 	.name = "vce_v4_0",
1050 	.early_init = vce_v4_0_early_init,
1051 	.late_init = NULL,
1052 	.sw_init = vce_v4_0_sw_init,
1053 	.sw_fini = vce_v4_0_sw_fini,
1054 	.hw_init = vce_v4_0_hw_init,
1055 	.hw_fini = vce_v4_0_hw_fini,
1056 	.suspend = vce_v4_0_suspend,
1057 	.resume = vce_v4_0_resume,
1058 	.is_idle = NULL /* vce_v4_0_is_idle */,
1059 	.wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1060 	.check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1061 	.pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1062 	.soft_reset = NULL /* vce_v4_0_soft_reset */,
1063 	.post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1064 	.set_clockgating_state = vce_v4_0_set_clockgating_state,
1065 	.set_powergating_state = NULL /* vce_v4_0_set_powergating_state */,
1066 };
1067 
1068 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1069 	.type = AMDGPU_RING_TYPE_VCE,
1070 	.align_mask = 0x3f,
1071 	.nop = VCE_CMD_NO_OP,
1072 	.support_64bit_ptrs = false,
1073 	.vmhub = AMDGPU_MMHUB,
1074 	.get_rptr = vce_v4_0_ring_get_rptr,
1075 	.get_wptr = vce_v4_0_ring_get_wptr,
1076 	.set_wptr = vce_v4_0_ring_set_wptr,
1077 	.parse_cs = amdgpu_vce_ring_parse_cs_vm,
1078 	.emit_frame_size =
1079 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1080 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1081 		4 + /* vce_v4_0_emit_vm_flush */
1082 		5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1083 		1, /* vce_v4_0_ring_insert_end */
1084 	.emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1085 	.emit_ib = vce_v4_0_ring_emit_ib,
1086 	.emit_vm_flush = vce_v4_0_emit_vm_flush,
1087 	.emit_fence = vce_v4_0_ring_emit_fence,
1088 	.test_ring = amdgpu_vce_ring_test_ring,
1089 	.test_ib = amdgpu_vce_ring_test_ib,
1090 	.insert_nop = amdgpu_ring_insert_nop,
1091 	.insert_end = vce_v4_0_ring_insert_end,
1092 	.pad_ib = amdgpu_ring_generic_pad_ib,
1093 	.begin_use = amdgpu_vce_ring_begin_use,
1094 	.end_use = amdgpu_vce_ring_end_use,
1095 	.emit_wreg = vce_v4_0_emit_wreg,
1096 	.emit_reg_wait = vce_v4_0_emit_reg_wait,
1097 	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1098 };
1099 
1100 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1101 {
1102 	int i;
1103 
1104 	for (i = 0; i < adev->vce.num_rings; i++) {
1105 		adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1106 		adev->vce.ring[i].me = i;
1107 	}
1108 	DRM_INFO("VCE enabled in VM mode\n");
1109 }
1110 
1111 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1112 	.set = vce_v4_0_set_interrupt_state,
1113 	.process = vce_v4_0_process_interrupt,
1114 };
1115 
1116 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1117 {
1118 	adev->vce.irq.num_types = 1;
1119 	adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1120 };
1121 
1122 const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1123 {
1124 	.type = AMD_IP_BLOCK_TYPE_VCE,
1125 	.major = 4,
1126 	.minor = 0,
1127 	.rev = 0,
1128 	.funcs = &vce_v4_0_ip_funcs,
1129 };
1130