xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c (revision 080e613c)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  */
26 
27 #include <linux/firmware.h>
28 #include <drm/drm_drv.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_vce.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "soc15_common.h"
35 #include "mmsch_v1_0.h"
36 
37 #include "vce/vce_4_0_offset.h"
38 #include "vce/vce_4_0_default.h"
39 #include "vce/vce_4_0_sh_mask.h"
40 #include "mmhub/mmhub_1_0_offset.h"
41 #include "mmhub/mmhub_1_0_sh_mask.h"
42 
43 #include "ivsrcid/vce/irqsrcs_vce_4_0.h"
44 
45 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
46 
47 #define VCE_V4_0_FW_SIZE	(384 * 1024)
48 #define VCE_V4_0_STACK_SIZE	(64 * 1024)
49 #define VCE_V4_0_DATA_SIZE	((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
50 
51 static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
52 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
53 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
54 
55 /**
56  * vce_v4_0_ring_get_rptr - get read pointer
57  *
58  * @ring: amdgpu_ring pointer
59  *
60  * Returns the current hardware read pointer
61  */
62 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
63 {
64 	struct amdgpu_device *adev = ring->adev;
65 
66 	if (ring->me == 0)
67 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
68 	else if (ring->me == 1)
69 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
70 	else
71 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
72 }
73 
74 /**
75  * vce_v4_0_ring_get_wptr - get write pointer
76  *
77  * @ring: amdgpu_ring pointer
78  *
79  * Returns the current hardware write pointer
80  */
81 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
82 {
83 	struct amdgpu_device *adev = ring->adev;
84 
85 	if (ring->use_doorbell)
86 		return adev->wb.wb[ring->wptr_offs];
87 
88 	if (ring->me == 0)
89 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
90 	else if (ring->me == 1)
91 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
92 	else
93 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
94 }
95 
96 /**
97  * vce_v4_0_ring_set_wptr - set write pointer
98  *
99  * @ring: amdgpu_ring pointer
100  *
101  * Commits the write pointer to the hardware
102  */
103 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
104 {
105 	struct amdgpu_device *adev = ring->adev;
106 
107 	if (ring->use_doorbell) {
108 		/* XXX check if swapping is necessary on BE */
109 		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
110 		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
111 		return;
112 	}
113 
114 	if (ring->me == 0)
115 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
116 			lower_32_bits(ring->wptr));
117 	else if (ring->me == 1)
118 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
119 			lower_32_bits(ring->wptr));
120 	else
121 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
122 			lower_32_bits(ring->wptr));
123 }
124 
125 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
126 {
127 	int i, j;
128 
129 	for (i = 0; i < 10; ++i) {
130 		for (j = 0; j < 100; ++j) {
131 			uint32_t status =
132 				RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
133 
134 			if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
135 				return 0;
136 			mdelay(10);
137 		}
138 
139 		DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
140 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
141 				VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
142 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
143 		mdelay(10);
144 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
145 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
146 		mdelay(10);
147 
148 	}
149 
150 	return -ETIMEDOUT;
151 }
152 
153 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
154 				struct amdgpu_mm_table *table)
155 {
156 	uint32_t data = 0, loop;
157 	uint64_t addr = table->gpu_addr;
158 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
159 	uint32_t size;
160 
161 	size = header->header_size + header->vce_table_size + header->uvd_table_size;
162 
163 	/* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
164 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
165 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
166 
167 	/* 2, update vmid of descriptor */
168 	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
169 	data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
170 	data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
171 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
172 
173 	/* 3, notify mmsch about the size of this descriptor */
174 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
175 
176 	/* 4, set resp to zero */
177 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
178 
179 	WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
180 	adev->wb.wb[adev->vce.ring[0].wptr_offs] = 0;
181 	adev->vce.ring[0].wptr = 0;
182 	adev->vce.ring[0].wptr_old = 0;
183 
184 	/* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
185 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
186 
187 	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
188 	loop = 1000;
189 	while ((data & 0x10000002) != 0x10000002) {
190 		udelay(10);
191 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
192 		loop--;
193 		if (!loop)
194 			break;
195 	}
196 
197 	if (!loop) {
198 		dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
199 		return -EBUSY;
200 	}
201 
202 	return 0;
203 }
204 
205 static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
206 {
207 	struct amdgpu_ring *ring;
208 	uint32_t offset, size;
209 	uint32_t table_size = 0;
210 	struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
211 	struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
212 	struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
213 	struct mmsch_v1_0_cmd_end end = { { 0 } };
214 	uint32_t *init_table = adev->virt.mm_table.cpu_addr;
215 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
216 
217 	direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
218 	direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
219 	direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
220 	end.cmd_header.command_type = MMSCH_COMMAND__END;
221 
222 	if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
223 		header->version = MMSCH_VERSION;
224 		header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
225 
226 		if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
227 			header->vce_table_offset = header->header_size;
228 		else
229 			header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
230 
231 		init_table += header->vce_table_offset;
232 
233 		ring = &adev->vce.ring[0];
234 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
235 					    lower_32_bits(ring->gpu_addr));
236 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
237 					    upper_32_bits(ring->gpu_addr));
238 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
239 					    ring->ring_size / 4);
240 
241 		/* BEGING OF MC_RESUME */
242 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
243 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
244 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
245 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
246 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
247 
248 		offset = AMDGPU_VCE_FIRMWARE_OFFSET;
249 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
250 			uint32_t low = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
251 			uint32_t hi = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi;
252 			uint64_t tmr_mc_addr = (uint64_t)(hi) << 32 | low;
253 
254 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
255 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), tmr_mc_addr >> 8);
256 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
257 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
258 						(tmr_mc_addr >> 40) & 0xff);
259 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
260 		} else {
261 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
262 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
263 						adev->vce.gpu_addr >> 8);
264 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
265 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
266 						(adev->vce.gpu_addr >> 40) & 0xff);
267 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
268 						offset & ~0x0f000000);
269 
270 		}
271 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
272 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
273 						adev->vce.gpu_addr >> 8);
274 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
275 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR1),
276 						(adev->vce.gpu_addr >> 40) & 0xff);
277 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
278 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
279 						adev->vce.gpu_addr >> 8);
280 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
281 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR2),
282 						(adev->vce.gpu_addr >> 40) & 0xff);
283 
284 		size = VCE_V4_0_FW_SIZE;
285 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
286 
287 		offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
288 		size = VCE_V4_0_STACK_SIZE;
289 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
290 					(offset & ~0x0f000000) | (1 << 24));
291 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
292 
293 		offset += size;
294 		size = VCE_V4_0_DATA_SIZE;
295 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
296 					(offset & ~0x0f000000) | (2 << 24));
297 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
298 
299 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
300 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
301 						   VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
302 						   VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
303 
304 		/* end of MC_RESUME */
305 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
306 						   VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
307 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
308 						   ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
309 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
310 						   ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
311 
312 		MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
313 					      VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
314 					      VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
315 
316 		/* clear BUSY flag */
317 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
318 						   ~VCE_STATUS__JOB_BUSY_MASK, 0);
319 
320 		/* add end packet */
321 		memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
322 		table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
323 		header->vce_table_size = table_size;
324 	}
325 
326 	return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
327 }
328 
329 /**
330  * vce_v4_0_start - start VCE block
331  *
332  * @adev: amdgpu_device pointer
333  *
334  * Setup and start the VCE block
335  */
336 static int vce_v4_0_start(struct amdgpu_device *adev)
337 {
338 	struct amdgpu_ring *ring;
339 	int r;
340 
341 	ring = &adev->vce.ring[0];
342 
343 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
344 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
345 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
346 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
347 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
348 
349 	ring = &adev->vce.ring[1];
350 
351 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
352 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
353 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
354 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
355 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
356 
357 	ring = &adev->vce.ring[2];
358 
359 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
360 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
361 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
362 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
363 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
364 
365 	vce_v4_0_mc_resume(adev);
366 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
367 			~VCE_STATUS__JOB_BUSY_MASK);
368 
369 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
370 
371 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
372 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
373 	mdelay(100);
374 
375 	r = vce_v4_0_firmware_loaded(adev);
376 
377 	/* clear BUSY flag */
378 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
379 
380 	if (r) {
381 		DRM_ERROR("VCE not responding, giving up!!!\n");
382 		return r;
383 	}
384 
385 	return 0;
386 }
387 
388 static int vce_v4_0_stop(struct amdgpu_device *adev)
389 {
390 
391 	/* Disable VCPU */
392 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
393 
394 	/* hold on ECPU */
395 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
396 			VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
397 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
398 
399 	/* clear VCE_STATUS */
400 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0);
401 
402 	/* Set Clock-Gating off */
403 	/* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
404 		vce_v4_0_set_vce_sw_clock_gating(adev, false);
405 	*/
406 
407 	return 0;
408 }
409 
410 static int vce_v4_0_early_init(void *handle)
411 {
412 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
413 
414 	if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
415 		adev->vce.num_rings = 1;
416 	else
417 		adev->vce.num_rings = 3;
418 
419 	vce_v4_0_set_ring_funcs(adev);
420 	vce_v4_0_set_irq_funcs(adev);
421 
422 	return 0;
423 }
424 
425 static int vce_v4_0_sw_init(void *handle)
426 {
427 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
428 	struct amdgpu_ring *ring;
429 
430 	unsigned size;
431 	int r, i;
432 
433 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
434 	if (r)
435 		return r;
436 
437 	size  = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE;
438 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
439 		size += VCE_V4_0_FW_SIZE;
440 
441 	r = amdgpu_vce_sw_init(adev, size);
442 	if (r)
443 		return r;
444 
445 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
446 		const struct common_firmware_header *hdr;
447 		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
448 
449 		adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL);
450 		if (!adev->vce.saved_bo)
451 			return -ENOMEM;
452 
453 		hdr = (const struct common_firmware_header *)adev->vce.fw->data;
454 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
455 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
456 		adev->firmware.fw_size +=
457 			ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
458 		DRM_INFO("PSP loading VCE firmware\n");
459 	} else {
460 		r = amdgpu_vce_resume(adev);
461 		if (r)
462 			return r;
463 	}
464 
465 	for (i = 0; i < adev->vce.num_rings; i++) {
466 		enum amdgpu_ring_priority_level hw_prio = amdgpu_vce_get_ring_prio(i);
467 
468 		ring = &adev->vce.ring[i];
469 		sprintf(ring->name, "vce%d", i);
470 		if (amdgpu_sriov_vf(adev)) {
471 			/* DOORBELL only works under SRIOV */
472 			ring->use_doorbell = true;
473 
474 			/* currently only use the first encoding ring for sriov,
475 			 * so set unused location for other unused rings.
476 			 */
477 			if (i == 0)
478 				ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring0_1 * 2;
479 			else
480 				ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring2_3 * 2 + 1;
481 		}
482 		r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0,
483 				     hw_prio, NULL);
484 		if (r)
485 			return r;
486 	}
487 
488 
489 	r = amdgpu_vce_entity_init(adev);
490 	if (r)
491 		return r;
492 
493 	r = amdgpu_virt_alloc_mm_table(adev);
494 	if (r)
495 		return r;
496 
497 	return r;
498 }
499 
500 static int vce_v4_0_sw_fini(void *handle)
501 {
502 	int r;
503 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
504 
505 	/* free MM table */
506 	amdgpu_virt_free_mm_table(adev);
507 
508 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
509 		kvfree(adev->vce.saved_bo);
510 		adev->vce.saved_bo = NULL;
511 	}
512 
513 	r = amdgpu_vce_suspend(adev);
514 	if (r)
515 		return r;
516 
517 	return amdgpu_vce_sw_fini(adev);
518 }
519 
520 static int vce_v4_0_hw_init(void *handle)
521 {
522 	int r, i;
523 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
524 
525 	if (amdgpu_sriov_vf(adev))
526 		r = vce_v4_0_sriov_start(adev);
527 	else
528 		r = vce_v4_0_start(adev);
529 	if (r)
530 		return r;
531 
532 	for (i = 0; i < adev->vce.num_rings; i++) {
533 		r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
534 		if (r)
535 			return r;
536 	}
537 
538 	DRM_INFO("VCE initialized successfully.\n");
539 
540 	return 0;
541 }
542 
543 static int vce_v4_0_hw_fini(void *handle)
544 {
545 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
546 
547 	/*
548 	 * Proper cleanups before halting the HW engine:
549 	 *   - cancel the delayed idle work
550 	 *   - enable powergating
551 	 *   - enable clockgating
552 	 *   - disable dpm
553 	 *
554 	 * TODO: to align with the VCN implementation, move the
555 	 * jobs for clockgating/powergating/dpm setting to
556 	 * ->set_powergating_state().
557 	 */
558 	cancel_delayed_work_sync(&adev->vce.idle_work);
559 
560 	if (adev->pm.dpm_enabled) {
561 		amdgpu_dpm_enable_vce(adev, false);
562 	} else {
563 		amdgpu_asic_set_vce_clocks(adev, 0, 0);
564 		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
565 						       AMD_PG_STATE_GATE);
566 		amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
567 						       AMD_CG_STATE_GATE);
568 	}
569 
570 	if (!amdgpu_sriov_vf(adev)) {
571 		/* vce_v4_0_wait_for_idle(handle); */
572 		vce_v4_0_stop(adev);
573 	} else {
574 		/* full access mode, so don't touch any VCE register */
575 		DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
576 	}
577 
578 	return 0;
579 }
580 
581 static int vce_v4_0_suspend(void *handle)
582 {
583 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
584 	int r, idx;
585 
586 	if (adev->vce.vcpu_bo == NULL)
587 		return 0;
588 
589 	if (drm_dev_enter(&adev->ddev, &idx)) {
590 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
591 			unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
592 			void *ptr = adev->vce.cpu_addr;
593 
594 			memcpy_fromio(adev->vce.saved_bo, ptr, size);
595 		}
596 		drm_dev_exit(idx);
597 	}
598 
599 	r = vce_v4_0_hw_fini(adev);
600 	if (r)
601 		return r;
602 
603 	return amdgpu_vce_suspend(adev);
604 }
605 
606 static int vce_v4_0_resume(void *handle)
607 {
608 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
609 	int r, idx;
610 
611 	if (adev->vce.vcpu_bo == NULL)
612 		return -EINVAL;
613 
614 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
615 
616 		if (drm_dev_enter(&adev->ddev, &idx)) {
617 			unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
618 			void *ptr = adev->vce.cpu_addr;
619 
620 			memcpy_toio(ptr, adev->vce.saved_bo, size);
621 			drm_dev_exit(idx);
622 		}
623 	} else {
624 		r = amdgpu_vce_resume(adev);
625 		if (r)
626 			return r;
627 	}
628 
629 	return vce_v4_0_hw_init(adev);
630 }
631 
632 static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
633 {
634 	uint32_t offset, size;
635 	uint64_t tmr_mc_addr;
636 
637 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
638 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
639 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
640 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
641 
642 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
643 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
644 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
645 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
646 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
647 
648 	offset = AMDGPU_VCE_FIRMWARE_OFFSET;
649 
650 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
651 		tmr_mc_addr = (uint64_t)(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi) << 32 |
652 										adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
653 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
654 			(tmr_mc_addr >> 8));
655 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
656 			(tmr_mc_addr >> 40) & 0xff);
657 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
658 	} else {
659 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
660 			(adev->vce.gpu_addr >> 8));
661 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
662 			(adev->vce.gpu_addr >> 40) & 0xff);
663 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
664 	}
665 
666 	size = VCE_V4_0_FW_SIZE;
667 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
668 
669 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
670 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
671 	offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
672 	size = VCE_V4_0_STACK_SIZE;
673 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
674 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
675 
676 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
677 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
678 	offset += size;
679 	size = VCE_V4_0_DATA_SIZE;
680 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
681 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
682 
683 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
684 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
685 			VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
686 			~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
687 }
688 
689 static int vce_v4_0_set_clockgating_state(void *handle,
690 					  enum amd_clockgating_state state)
691 {
692 	/* needed for driver unload*/
693 	return 0;
694 }
695 
696 #if 0
697 static bool vce_v4_0_is_idle(void *handle)
698 {
699 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
700 	u32 mask = 0;
701 
702 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
703 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
704 
705 	return !(RREG32(mmSRBM_STATUS2) & mask);
706 }
707 
708 static int vce_v4_0_wait_for_idle(void *handle)
709 {
710 	unsigned i;
711 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
712 
713 	for (i = 0; i < adev->usec_timeout; i++)
714 		if (vce_v4_0_is_idle(handle))
715 			return 0;
716 
717 	return -ETIMEDOUT;
718 }
719 
720 #define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
721 #define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
722 #define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
723 #define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
724 				      VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
725 
726 static bool vce_v4_0_check_soft_reset(void *handle)
727 {
728 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
729 	u32 srbm_soft_reset = 0;
730 
731 	/* According to VCE team , we should use VCE_STATUS instead
732 	 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
733 	 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
734 	 * instance's registers are accessed
735 	 * (0 for 1st instance, 10 for 2nd instance).
736 	 *
737 	 *VCE_STATUS
738 	 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
739 	 *|----+----+-----------+----+----+----+----------+---------+----|
740 	 *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
741 	 *
742 	 * VCE team suggest use bit 3--bit 6 for busy status check
743 	 */
744 	mutex_lock(&adev->grbm_idx_mutex);
745 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
746 	if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
747 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
748 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
749 	}
750 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
751 	if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
752 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
753 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
754 	}
755 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
756 	mutex_unlock(&adev->grbm_idx_mutex);
757 
758 	if (srbm_soft_reset) {
759 		adev->vce.srbm_soft_reset = srbm_soft_reset;
760 		return true;
761 	} else {
762 		adev->vce.srbm_soft_reset = 0;
763 		return false;
764 	}
765 }
766 
767 static int vce_v4_0_soft_reset(void *handle)
768 {
769 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
770 	u32 srbm_soft_reset;
771 
772 	if (!adev->vce.srbm_soft_reset)
773 		return 0;
774 	srbm_soft_reset = adev->vce.srbm_soft_reset;
775 
776 	if (srbm_soft_reset) {
777 		u32 tmp;
778 
779 		tmp = RREG32(mmSRBM_SOFT_RESET);
780 		tmp |= srbm_soft_reset;
781 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
782 		WREG32(mmSRBM_SOFT_RESET, tmp);
783 		tmp = RREG32(mmSRBM_SOFT_RESET);
784 
785 		udelay(50);
786 
787 		tmp &= ~srbm_soft_reset;
788 		WREG32(mmSRBM_SOFT_RESET, tmp);
789 		tmp = RREG32(mmSRBM_SOFT_RESET);
790 
791 		/* Wait a little for things to settle down */
792 		udelay(50);
793 	}
794 
795 	return 0;
796 }
797 
798 static int vce_v4_0_pre_soft_reset(void *handle)
799 {
800 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
801 
802 	if (!adev->vce.srbm_soft_reset)
803 		return 0;
804 
805 	mdelay(5);
806 
807 	return vce_v4_0_suspend(adev);
808 }
809 
810 
811 static int vce_v4_0_post_soft_reset(void *handle)
812 {
813 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
814 
815 	if (!adev->vce.srbm_soft_reset)
816 		return 0;
817 
818 	mdelay(5);
819 
820 	return vce_v4_0_resume(adev);
821 }
822 
823 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
824 {
825 	u32 tmp, data;
826 
827 	tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
828 	if (override)
829 		data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
830 	else
831 		data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
832 
833 	if (tmp != data)
834 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
835 }
836 
837 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
838 					     bool gated)
839 {
840 	u32 data;
841 
842 	/* Set Override to disable Clock Gating */
843 	vce_v4_0_override_vce_clock_gating(adev, true);
844 
845 	/* This function enables MGCG which is controlled by firmware.
846 	   With the clocks in the gated state the core is still
847 	   accessible but the firmware will throttle the clocks on the
848 	   fly as necessary.
849 	*/
850 	if (gated) {
851 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
852 		data |= 0x1ff;
853 		data &= ~0xef0000;
854 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
855 
856 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
857 		data |= 0x3ff000;
858 		data &= ~0xffc00000;
859 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
860 
861 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
862 		data |= 0x2;
863 		data &= ~0x00010000;
864 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
865 
866 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
867 		data |= 0x37f;
868 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
869 
870 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
871 		data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
872 			VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
873 			VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
874 			0x8;
875 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
876 	} else {
877 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
878 		data &= ~0x80010;
879 		data |= 0xe70008;
880 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
881 
882 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
883 		data |= 0xffc00000;
884 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
885 
886 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
887 		data |= 0x10000;
888 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
889 
890 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
891 		data &= ~0xffc00000;
892 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
893 
894 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
895 		data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
896 			  VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
897 			  VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
898 			  0x8);
899 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
900 	}
901 	vce_v4_0_override_vce_clock_gating(adev, false);
902 }
903 
904 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
905 {
906 	u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
907 
908 	if (enable)
909 		tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
910 	else
911 		tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
912 
913 	WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
914 }
915 
916 static int vce_v4_0_set_clockgating_state(void *handle,
917 					  enum amd_clockgating_state state)
918 {
919 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
920 	bool enable = (state == AMD_CG_STATE_GATE);
921 	int i;
922 
923 	if ((adev->asic_type == CHIP_POLARIS10) ||
924 		(adev->asic_type == CHIP_TONGA) ||
925 		(adev->asic_type == CHIP_FIJI))
926 		vce_v4_0_set_bypass_mode(adev, enable);
927 
928 	if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
929 		return 0;
930 
931 	mutex_lock(&adev->grbm_idx_mutex);
932 	for (i = 0; i < 2; i++) {
933 		/* Program VCE Instance 0 or 1 if not harvested */
934 		if (adev->vce.harvest_config & (1 << i))
935 			continue;
936 
937 		WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
938 
939 		if (enable) {
940 			/* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
941 			uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
942 			data &= ~(0xf | 0xff0);
943 			data |= ((0x0 << 0) | (0x04 << 4));
944 			WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
945 
946 			/* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
947 			data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
948 			data &= ~(0xf | 0xff0);
949 			data |= ((0x0 << 0) | (0x04 << 4));
950 			WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
951 		}
952 
953 		vce_v4_0_set_vce_sw_clock_gating(adev, enable);
954 	}
955 
956 	WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
957 	mutex_unlock(&adev->grbm_idx_mutex);
958 
959 	return 0;
960 }
961 #endif
962 
963 static int vce_v4_0_set_powergating_state(void *handle,
964 					  enum amd_powergating_state state)
965 {
966 	/* This doesn't actually powergate the VCE block.
967 	 * That's done in the dpm code via the SMC.  This
968 	 * just re-inits the block as necessary.  The actual
969 	 * gating still happens in the dpm code.  We should
970 	 * revisit this when there is a cleaner line between
971 	 * the smc and the hw blocks
972 	 */
973 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
974 
975 	if (state == AMD_PG_STATE_GATE)
976 		return vce_v4_0_stop(adev);
977 	else
978 		return vce_v4_0_start(adev);
979 }
980 
981 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
982 					struct amdgpu_ib *ib, uint32_t flags)
983 {
984 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
985 
986 	amdgpu_ring_write(ring, VCE_CMD_IB_VM);
987 	amdgpu_ring_write(ring, vmid);
988 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
989 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
990 	amdgpu_ring_write(ring, ib->length_dw);
991 }
992 
993 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
994 			u64 seq, unsigned flags)
995 {
996 	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
997 
998 	amdgpu_ring_write(ring, VCE_CMD_FENCE);
999 	amdgpu_ring_write(ring, addr);
1000 	amdgpu_ring_write(ring, upper_32_bits(addr));
1001 	amdgpu_ring_write(ring, seq);
1002 	amdgpu_ring_write(ring, VCE_CMD_TRAP);
1003 }
1004 
1005 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
1006 {
1007 	amdgpu_ring_write(ring, VCE_CMD_END);
1008 }
1009 
1010 static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
1011 				   uint32_t val, uint32_t mask)
1012 {
1013 	amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
1014 	amdgpu_ring_write(ring,	reg << 2);
1015 	amdgpu_ring_write(ring, mask);
1016 	amdgpu_ring_write(ring, val);
1017 }
1018 
1019 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
1020 				   unsigned int vmid, uint64_t pd_addr)
1021 {
1022 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
1023 
1024 	pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
1025 
1026 	/* wait for reg writes */
1027 	vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 +
1028 			       vmid * hub->ctx_addr_distance,
1029 			       lower_32_bits(pd_addr), 0xffffffff);
1030 }
1031 
1032 static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring,
1033 			       uint32_t reg, uint32_t val)
1034 {
1035 	amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
1036 	amdgpu_ring_write(ring,	reg << 2);
1037 	amdgpu_ring_write(ring, val);
1038 }
1039 
1040 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
1041 					struct amdgpu_irq_src *source,
1042 					unsigned type,
1043 					enum amdgpu_interrupt_state state)
1044 {
1045 	uint32_t val = 0;
1046 
1047 	if (!amdgpu_sriov_vf(adev)) {
1048 		if (state == AMDGPU_IRQ_STATE_ENABLE)
1049 			val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
1050 
1051 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
1052 				~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
1053 	}
1054 	return 0;
1055 }
1056 
1057 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1058 				      struct amdgpu_irq_src *source,
1059 				      struct amdgpu_iv_entry *entry)
1060 {
1061 	DRM_DEBUG("IH: VCE\n");
1062 
1063 	switch (entry->src_data[0]) {
1064 	case 0:
1065 	case 1:
1066 	case 2:
1067 		amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
1068 		break;
1069 	default:
1070 		DRM_ERROR("Unhandled interrupt: %d %d\n",
1071 			  entry->src_id, entry->src_data[0]);
1072 		break;
1073 	}
1074 
1075 	return 0;
1076 }
1077 
1078 const struct amd_ip_funcs vce_v4_0_ip_funcs = {
1079 	.name = "vce_v4_0",
1080 	.early_init = vce_v4_0_early_init,
1081 	.late_init = NULL,
1082 	.sw_init = vce_v4_0_sw_init,
1083 	.sw_fini = vce_v4_0_sw_fini,
1084 	.hw_init = vce_v4_0_hw_init,
1085 	.hw_fini = vce_v4_0_hw_fini,
1086 	.suspend = vce_v4_0_suspend,
1087 	.resume = vce_v4_0_resume,
1088 	.is_idle = NULL /* vce_v4_0_is_idle */,
1089 	.wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1090 	.check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1091 	.pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1092 	.soft_reset = NULL /* vce_v4_0_soft_reset */,
1093 	.post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1094 	.set_clockgating_state = vce_v4_0_set_clockgating_state,
1095 	.set_powergating_state = vce_v4_0_set_powergating_state,
1096 };
1097 
1098 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1099 	.type = AMDGPU_RING_TYPE_VCE,
1100 	.align_mask = 0x3f,
1101 	.nop = VCE_CMD_NO_OP,
1102 	.support_64bit_ptrs = false,
1103 	.no_user_fence = true,
1104 	.vmhub = AMDGPU_MMHUB_0,
1105 	.get_rptr = vce_v4_0_ring_get_rptr,
1106 	.get_wptr = vce_v4_0_ring_get_wptr,
1107 	.set_wptr = vce_v4_0_ring_set_wptr,
1108 	.parse_cs = amdgpu_vce_ring_parse_cs_vm,
1109 	.emit_frame_size =
1110 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1111 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1112 		4 + /* vce_v4_0_emit_vm_flush */
1113 		5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1114 		1, /* vce_v4_0_ring_insert_end */
1115 	.emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1116 	.emit_ib = vce_v4_0_ring_emit_ib,
1117 	.emit_vm_flush = vce_v4_0_emit_vm_flush,
1118 	.emit_fence = vce_v4_0_ring_emit_fence,
1119 	.test_ring = amdgpu_vce_ring_test_ring,
1120 	.test_ib = amdgpu_vce_ring_test_ib,
1121 	.insert_nop = amdgpu_ring_insert_nop,
1122 	.insert_end = vce_v4_0_ring_insert_end,
1123 	.pad_ib = amdgpu_ring_generic_pad_ib,
1124 	.begin_use = amdgpu_vce_ring_begin_use,
1125 	.end_use = amdgpu_vce_ring_end_use,
1126 	.emit_wreg = vce_v4_0_emit_wreg,
1127 	.emit_reg_wait = vce_v4_0_emit_reg_wait,
1128 	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1129 };
1130 
1131 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1132 {
1133 	int i;
1134 
1135 	for (i = 0; i < adev->vce.num_rings; i++) {
1136 		adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1137 		adev->vce.ring[i].me = i;
1138 	}
1139 	DRM_INFO("VCE enabled in VM mode\n");
1140 }
1141 
1142 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1143 	.set = vce_v4_0_set_interrupt_state,
1144 	.process = vce_v4_0_process_interrupt,
1145 };
1146 
1147 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1148 {
1149 	adev->vce.irq.num_types = 1;
1150 	adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1151 };
1152 
1153 const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1154 {
1155 	.type = AMD_IP_BLOCK_TYPE_VCE,
1156 	.major = 4,
1157 	.minor = 0,
1158 	.rev = 0,
1159 	.funcs = &vce_v4_0_ip_funcs,
1160 };
1161