xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c (revision 4f6cce39)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  * Authors: Christian König <christian.koenig@amd.com>
26  */
27 
28 #include <linux/firmware.h>
29 #include <drm/drmP.h>
30 #include "amdgpu.h"
31 #include "amdgpu_vce.h"
32 #include "vid.h"
33 #include "vce/vce_3_0_d.h"
34 #include "vce/vce_3_0_sh_mask.h"
35 #include "oss/oss_3_0_d.h"
36 #include "oss/oss_3_0_sh_mask.h"
37 #include "gca/gfx_8_0_d.h"
38 #include "smu/smu_7_1_2_d.h"
39 #include "smu/smu_7_1_2_sh_mask.h"
40 #include "gca/gfx_8_0_d.h"
41 #include "gca/gfx_8_0_sh_mask.h"
42 
43 
44 #define GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT	0x04
45 #define GRBM_GFX_INDEX__VCE_INSTANCE_MASK	0x10
46 #define GRBM_GFX_INDEX__VCE_ALL_PIPE		0x07
47 
48 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR0	0x8616
49 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR1	0x8617
50 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR2	0x8618
51 #define mmGRBM_GFX_INDEX_DEFAULT 0xE0000000
52 
53 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
54 
55 #define VCE_V3_0_FW_SIZE	(384 * 1024)
56 #define VCE_V3_0_STACK_SIZE	(64 * 1024)
57 #define VCE_V3_0_DATA_SIZE	((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
58 
59 #define FW_52_8_3	((52 << 24) | (8 << 16) | (3 << 8))
60 
61 #define GET_VCE_INSTANCE(i)  ((i) << GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT \
62 					| GRBM_GFX_INDEX__VCE_ALL_PIPE)
63 
64 static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx);
65 static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev);
66 static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev);
67 static int vce_v3_0_wait_for_idle(void *handle);
68 
69 /**
70  * vce_v3_0_ring_get_rptr - get read pointer
71  *
72  * @ring: amdgpu_ring pointer
73  *
74  * Returns the current hardware read pointer
75  */
76 static uint32_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring)
77 {
78 	struct amdgpu_device *adev = ring->adev;
79 
80 	if (ring == &adev->vce.ring[0])
81 		return RREG32(mmVCE_RB_RPTR);
82 	else if (ring == &adev->vce.ring[1])
83 		return RREG32(mmVCE_RB_RPTR2);
84 	else
85 		return RREG32(mmVCE_RB_RPTR3);
86 }
87 
88 /**
89  * vce_v3_0_ring_get_wptr - get write pointer
90  *
91  * @ring: amdgpu_ring pointer
92  *
93  * Returns the current hardware write pointer
94  */
95 static uint32_t vce_v3_0_ring_get_wptr(struct amdgpu_ring *ring)
96 {
97 	struct amdgpu_device *adev = ring->adev;
98 
99 	if (ring == &adev->vce.ring[0])
100 		return RREG32(mmVCE_RB_WPTR);
101 	else if (ring == &adev->vce.ring[1])
102 		return RREG32(mmVCE_RB_WPTR2);
103 	else
104 		return RREG32(mmVCE_RB_WPTR3);
105 }
106 
107 /**
108  * vce_v3_0_ring_set_wptr - set write pointer
109  *
110  * @ring: amdgpu_ring pointer
111  *
112  * Commits the write pointer to the hardware
113  */
114 static void vce_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
115 {
116 	struct amdgpu_device *adev = ring->adev;
117 
118 	if (ring == &adev->vce.ring[0])
119 		WREG32(mmVCE_RB_WPTR, ring->wptr);
120 	else if (ring == &adev->vce.ring[1])
121 		WREG32(mmVCE_RB_WPTR2, ring->wptr);
122 	else
123 		WREG32(mmVCE_RB_WPTR3, ring->wptr);
124 }
125 
126 static void vce_v3_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
127 {
128 	WREG32_FIELD(VCE_RB_ARB_CTRL, VCE_CGTT_OVERRIDE, override ? 1 : 0);
129 }
130 
131 static void vce_v3_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
132 					     bool gated)
133 {
134 	u32 data;
135 
136 	/* Set Override to disable Clock Gating */
137 	vce_v3_0_override_vce_clock_gating(adev, true);
138 
139 	/* This function enables MGCG which is controlled by firmware.
140 	   With the clocks in the gated state the core is still
141 	   accessible but the firmware will throttle the clocks on the
142 	   fly as necessary.
143 	*/
144 	if (!gated) {
145 		data = RREG32(mmVCE_CLOCK_GATING_B);
146 		data |= 0x1ff;
147 		data &= ~0xef0000;
148 		WREG32(mmVCE_CLOCK_GATING_B, data);
149 
150 		data = RREG32(mmVCE_UENC_CLOCK_GATING);
151 		data |= 0x3ff000;
152 		data &= ~0xffc00000;
153 		WREG32(mmVCE_UENC_CLOCK_GATING, data);
154 
155 		data = RREG32(mmVCE_UENC_CLOCK_GATING_2);
156 		data |= 0x2;
157 		data &= ~0x00010000;
158 		WREG32(mmVCE_UENC_CLOCK_GATING_2, data);
159 
160 		data = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
161 		data |= 0x37f;
162 		WREG32(mmVCE_UENC_REG_CLOCK_GATING, data);
163 
164 		data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL);
165 		data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
166 			VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
167 			VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
168 			0x8;
169 		WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data);
170 	} else {
171 		data = RREG32(mmVCE_CLOCK_GATING_B);
172 		data &= ~0x80010;
173 		data |= 0xe70008;
174 		WREG32(mmVCE_CLOCK_GATING_B, data);
175 
176 		data = RREG32(mmVCE_UENC_CLOCK_GATING);
177 		data |= 0xffc00000;
178 		WREG32(mmVCE_UENC_CLOCK_GATING, data);
179 
180 		data = RREG32(mmVCE_UENC_CLOCK_GATING_2);
181 		data |= 0x10000;
182 		WREG32(mmVCE_UENC_CLOCK_GATING_2, data);
183 
184 		data = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
185 		data &= ~0x3ff;
186 		WREG32(mmVCE_UENC_REG_CLOCK_GATING, data);
187 
188 		data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL);
189 		data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
190 			  VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
191 			  VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
192 			  0x8);
193 		WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data);
194 	}
195 	vce_v3_0_override_vce_clock_gating(adev, false);
196 }
197 
198 static int vce_v3_0_firmware_loaded(struct amdgpu_device *adev)
199 {
200 	int i, j;
201 
202 	for (i = 0; i < 10; ++i) {
203 		for (j = 0; j < 100; ++j) {
204 			uint32_t status = RREG32(mmVCE_STATUS);
205 
206 			if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
207 				return 0;
208 			mdelay(10);
209 		}
210 
211 		DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
212 		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 1);
213 		mdelay(10);
214 		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 0);
215 		mdelay(10);
216 	}
217 
218 	return -ETIMEDOUT;
219 }
220 
221 /**
222  * vce_v3_0_start - start VCE block
223  *
224  * @adev: amdgpu_device pointer
225  *
226  * Setup and start the VCE block
227  */
228 static int vce_v3_0_start(struct amdgpu_device *adev)
229 {
230 	struct amdgpu_ring *ring;
231 	int idx, r;
232 
233 	ring = &adev->vce.ring[0];
234 	WREG32(mmVCE_RB_RPTR, ring->wptr);
235 	WREG32(mmVCE_RB_WPTR, ring->wptr);
236 	WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr);
237 	WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
238 	WREG32(mmVCE_RB_SIZE, ring->ring_size / 4);
239 
240 	ring = &adev->vce.ring[1];
241 	WREG32(mmVCE_RB_RPTR2, ring->wptr);
242 	WREG32(mmVCE_RB_WPTR2, ring->wptr);
243 	WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr);
244 	WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
245 	WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4);
246 
247 	ring = &adev->vce.ring[2];
248 	WREG32(mmVCE_RB_RPTR3, ring->wptr);
249 	WREG32(mmVCE_RB_WPTR3, ring->wptr);
250 	WREG32(mmVCE_RB_BASE_LO3, ring->gpu_addr);
251 	WREG32(mmVCE_RB_BASE_HI3, upper_32_bits(ring->gpu_addr));
252 	WREG32(mmVCE_RB_SIZE3, ring->ring_size / 4);
253 
254 	mutex_lock(&adev->grbm_idx_mutex);
255 	for (idx = 0; idx < 2; ++idx) {
256 		if (adev->vce.harvest_config & (1 << idx))
257 			continue;
258 
259 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(idx));
260 		vce_v3_0_mc_resume(adev, idx);
261 		WREG32_FIELD(VCE_STATUS, JOB_BUSY, 1);
262 
263 		if (adev->asic_type >= CHIP_STONEY)
264 			WREG32_P(mmVCE_VCPU_CNTL, 1, ~0x200001);
265 		else
266 			WREG32_FIELD(VCE_VCPU_CNTL, CLK_EN, 1);
267 
268 		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 0);
269 		mdelay(100);
270 
271 		r = vce_v3_0_firmware_loaded(adev);
272 
273 		/* clear BUSY flag */
274 		WREG32_FIELD(VCE_STATUS, JOB_BUSY, 0);
275 
276 		if (r) {
277 			DRM_ERROR("VCE not responding, giving up!!!\n");
278 			mutex_unlock(&adev->grbm_idx_mutex);
279 			return r;
280 		}
281 	}
282 
283 	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
284 	mutex_unlock(&adev->grbm_idx_mutex);
285 
286 	return 0;
287 }
288 
289 static int vce_v3_0_stop(struct amdgpu_device *adev)
290 {
291 	int idx;
292 
293 	mutex_lock(&adev->grbm_idx_mutex);
294 	for (idx = 0; idx < 2; ++idx) {
295 		if (adev->vce.harvest_config & (1 << idx))
296 			continue;
297 
298 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(idx));
299 
300 		if (adev->asic_type >= CHIP_STONEY)
301 			WREG32_P(mmVCE_VCPU_CNTL, 0, ~0x200001);
302 		else
303 			WREG32_FIELD(VCE_VCPU_CNTL, CLK_EN, 0);
304 
305 		/* hold on ECPU */
306 		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 1);
307 
308 		/* clear BUSY flag */
309 		WREG32_FIELD(VCE_STATUS, JOB_BUSY, 0);
310 
311 		/* Set Clock-Gating off */
312 		if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
313 			vce_v3_0_set_vce_sw_clock_gating(adev, false);
314 	}
315 
316 	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
317 	mutex_unlock(&adev->grbm_idx_mutex);
318 
319 	return 0;
320 }
321 
322 #define ixVCE_HARVEST_FUSE_MACRO__ADDRESS     0xC0014074
323 #define VCE_HARVEST_FUSE_MACRO__SHIFT       27
324 #define VCE_HARVEST_FUSE_MACRO__MASK        0x18000000
325 
326 static unsigned vce_v3_0_get_harvest_config(struct amdgpu_device *adev)
327 {
328 	u32 tmp;
329 
330 	/* Fiji, Stoney, Polaris10, Polaris11, Polaris12 are single pipe */
331 	if ((adev->asic_type == CHIP_FIJI) ||
332 	    (adev->asic_type == CHIP_STONEY) ||
333 	    (adev->asic_type == CHIP_POLARIS10) ||
334 	    (adev->asic_type == CHIP_POLARIS11) ||
335 	    (adev->asic_type == CHIP_POLARIS12))
336 		return AMDGPU_VCE_HARVEST_VCE1;
337 
338 	/* Tonga and CZ are dual or single pipe */
339 	if (adev->flags & AMD_IS_APU)
340 		tmp = (RREG32_SMC(ixVCE_HARVEST_FUSE_MACRO__ADDRESS) &
341 		       VCE_HARVEST_FUSE_MACRO__MASK) >>
342 			VCE_HARVEST_FUSE_MACRO__SHIFT;
343 	else
344 		tmp = (RREG32_SMC(ixCC_HARVEST_FUSES) &
345 		       CC_HARVEST_FUSES__VCE_DISABLE_MASK) >>
346 			CC_HARVEST_FUSES__VCE_DISABLE__SHIFT;
347 
348 	switch (tmp) {
349 	case 1:
350 		return AMDGPU_VCE_HARVEST_VCE0;
351 	case 2:
352 		return AMDGPU_VCE_HARVEST_VCE1;
353 	case 3:
354 		return AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1;
355 	default:
356 		return 0;
357 	}
358 }
359 
360 static int vce_v3_0_early_init(void *handle)
361 {
362 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
363 
364 	adev->vce.harvest_config = vce_v3_0_get_harvest_config(adev);
365 
366 	if ((adev->vce.harvest_config &
367 	     (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1)) ==
368 	    (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1))
369 		return -ENOENT;
370 
371 	adev->vce.num_rings = 3;
372 
373 	vce_v3_0_set_ring_funcs(adev);
374 	vce_v3_0_set_irq_funcs(adev);
375 
376 	return 0;
377 }
378 
379 static int vce_v3_0_sw_init(void *handle)
380 {
381 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
382 	struct amdgpu_ring *ring;
383 	int r, i;
384 
385 	/* VCE */
386 	r = amdgpu_irq_add_id(adev, 167, &adev->vce.irq);
387 	if (r)
388 		return r;
389 
390 	r = amdgpu_vce_sw_init(adev, VCE_V3_0_FW_SIZE +
391 		(VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE) * 2);
392 	if (r)
393 		return r;
394 
395 	/* 52.8.3 required for 3 ring support */
396 	if (adev->vce.fw_version < FW_52_8_3)
397 		adev->vce.num_rings = 2;
398 
399 	r = amdgpu_vce_resume(adev);
400 	if (r)
401 		return r;
402 
403 	for (i = 0; i < adev->vce.num_rings; i++) {
404 		ring = &adev->vce.ring[i];
405 		sprintf(ring->name, "vce%d", i);
406 		r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
407 		if (r)
408 			return r;
409 	}
410 
411 	return r;
412 }
413 
414 static int vce_v3_0_sw_fini(void *handle)
415 {
416 	int r;
417 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
418 
419 	r = amdgpu_vce_suspend(adev);
420 	if (r)
421 		return r;
422 
423 	r = amdgpu_vce_sw_fini(adev);
424 	if (r)
425 		return r;
426 
427 	return r;
428 }
429 
430 static int vce_v3_0_hw_init(void *handle)
431 {
432 	int r, i;
433 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
434 
435 	vce_v3_0_override_vce_clock_gating(adev, true);
436 	if (!(adev->flags & AMD_IS_APU))
437 		amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
438 
439 	for (i = 0; i < adev->vce.num_rings; i++)
440 		adev->vce.ring[i].ready = false;
441 
442 	for (i = 0; i < adev->vce.num_rings; i++) {
443 		r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
444 		if (r)
445 			return r;
446 		else
447 			adev->vce.ring[i].ready = true;
448 	}
449 
450 	DRM_INFO("VCE initialized successfully.\n");
451 
452 	return 0;
453 }
454 
455 static int vce_v3_0_hw_fini(void *handle)
456 {
457 	int r;
458 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
459 
460 	r = vce_v3_0_wait_for_idle(handle);
461 	if (r)
462 		return r;
463 
464 	return vce_v3_0_stop(adev);
465 }
466 
467 static int vce_v3_0_suspend(void *handle)
468 {
469 	int r;
470 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
471 
472 	r = vce_v3_0_hw_fini(adev);
473 	if (r)
474 		return r;
475 
476 	r = amdgpu_vce_suspend(adev);
477 	if (r)
478 		return r;
479 
480 	return r;
481 }
482 
483 static int vce_v3_0_resume(void *handle)
484 {
485 	int r;
486 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
487 
488 	r = amdgpu_vce_resume(adev);
489 	if (r)
490 		return r;
491 
492 	r = vce_v3_0_hw_init(adev);
493 	if (r)
494 		return r;
495 
496 	return r;
497 }
498 
499 static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx)
500 {
501 	uint32_t offset, size;
502 
503 	WREG32_P(mmVCE_CLOCK_GATING_A, 0, ~(1 << 16));
504 	WREG32_P(mmVCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000);
505 	WREG32_P(mmVCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F);
506 	WREG32(mmVCE_CLOCK_GATING_B, 0x1FF);
507 
508 	WREG32(mmVCE_LMI_CTRL, 0x00398000);
509 	WREG32_P(mmVCE_LMI_CACHE_CTRL, 0x0, ~0x1);
510 	WREG32(mmVCE_LMI_SWAP_CNTL, 0);
511 	WREG32(mmVCE_LMI_SWAP_CNTL1, 0);
512 	WREG32(mmVCE_LMI_VM_CTRL, 0);
513 	WREG32_OR(mmVCE_VCPU_CNTL, 0x00100000);
514 
515 	if (adev->asic_type >= CHIP_STONEY) {
516 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR0, (adev->vce.gpu_addr >> 8));
517 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR1, (adev->vce.gpu_addr >> 8));
518 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR2, (adev->vce.gpu_addr >> 8));
519 	} else
520 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8));
521 	offset = AMDGPU_VCE_FIRMWARE_OFFSET;
522 	size = VCE_V3_0_FW_SIZE;
523 	WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff);
524 	WREG32(mmVCE_VCPU_CACHE_SIZE0, size);
525 
526 	if (idx == 0) {
527 		offset += size;
528 		size = VCE_V3_0_STACK_SIZE;
529 		WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0x7fffffff);
530 		WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
531 		offset += size;
532 		size = VCE_V3_0_DATA_SIZE;
533 		WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0x7fffffff);
534 		WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
535 	} else {
536 		offset += size + VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE;
537 		size = VCE_V3_0_STACK_SIZE;
538 		WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0xfffffff);
539 		WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
540 		offset += size;
541 		size = VCE_V3_0_DATA_SIZE;
542 		WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0xfffffff);
543 		WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
544 	}
545 
546 	WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100);
547 	WREG32_FIELD(VCE_SYS_INT_EN, VCE_SYS_INT_TRAP_INTERRUPT_EN, 1);
548 }
549 
550 static bool vce_v3_0_is_idle(void *handle)
551 {
552 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
553 	u32 mask = 0;
554 
555 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
556 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
557 
558 	return !(RREG32(mmSRBM_STATUS2) & mask);
559 }
560 
561 static int vce_v3_0_wait_for_idle(void *handle)
562 {
563 	unsigned i;
564 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
565 
566 	for (i = 0; i < adev->usec_timeout; i++)
567 		if (vce_v3_0_is_idle(handle))
568 			return 0;
569 
570 	return -ETIMEDOUT;
571 }
572 
573 #define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
574 #define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
575 #define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
576 #define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
577 				      VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
578 
579 static bool vce_v3_0_check_soft_reset(void *handle)
580 {
581 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
582 	u32 srbm_soft_reset = 0;
583 
584 	/* According to VCE team , we should use VCE_STATUS instead
585 	 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
586 	 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
587 	 * instance's registers are accessed
588 	 * (0 for 1st instance, 10 for 2nd instance).
589 	 *
590 	 *VCE_STATUS
591 	 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
592 	 *|----+----+-----------+----+----+----+----------+---------+----|
593 	 *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
594 	 *
595 	 * VCE team suggest use bit 3--bit 6 for busy status check
596 	 */
597 	mutex_lock(&adev->grbm_idx_mutex);
598 	WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
599 	if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
600 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
601 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
602 	}
603 	WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
604 	if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
605 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
606 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
607 	}
608 	WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
609 	mutex_unlock(&adev->grbm_idx_mutex);
610 
611 	if (srbm_soft_reset) {
612 		adev->vce.srbm_soft_reset = srbm_soft_reset;
613 		return true;
614 	} else {
615 		adev->vce.srbm_soft_reset = 0;
616 		return false;
617 	}
618 }
619 
620 static int vce_v3_0_soft_reset(void *handle)
621 {
622 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
623 	u32 srbm_soft_reset;
624 
625 	if (!adev->vce.srbm_soft_reset)
626 		return 0;
627 	srbm_soft_reset = adev->vce.srbm_soft_reset;
628 
629 	if (srbm_soft_reset) {
630 		u32 tmp;
631 
632 		tmp = RREG32(mmSRBM_SOFT_RESET);
633 		tmp |= srbm_soft_reset;
634 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
635 		WREG32(mmSRBM_SOFT_RESET, tmp);
636 		tmp = RREG32(mmSRBM_SOFT_RESET);
637 
638 		udelay(50);
639 
640 		tmp &= ~srbm_soft_reset;
641 		WREG32(mmSRBM_SOFT_RESET, tmp);
642 		tmp = RREG32(mmSRBM_SOFT_RESET);
643 
644 		/* Wait a little for things to settle down */
645 		udelay(50);
646 	}
647 
648 	return 0;
649 }
650 
651 static int vce_v3_0_pre_soft_reset(void *handle)
652 {
653 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
654 
655 	if (!adev->vce.srbm_soft_reset)
656 		return 0;
657 
658 	mdelay(5);
659 
660 	return vce_v3_0_suspend(adev);
661 }
662 
663 
664 static int vce_v3_0_post_soft_reset(void *handle)
665 {
666 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
667 
668 	if (!adev->vce.srbm_soft_reset)
669 		return 0;
670 
671 	mdelay(5);
672 
673 	return vce_v3_0_resume(adev);
674 }
675 
676 static int vce_v3_0_set_interrupt_state(struct amdgpu_device *adev,
677 					struct amdgpu_irq_src *source,
678 					unsigned type,
679 					enum amdgpu_interrupt_state state)
680 {
681 	uint32_t val = 0;
682 
683 	if (state == AMDGPU_IRQ_STATE_ENABLE)
684 		val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
685 
686 	WREG32_P(mmVCE_SYS_INT_EN, val, ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
687 	return 0;
688 }
689 
690 static int vce_v3_0_process_interrupt(struct amdgpu_device *adev,
691 				      struct amdgpu_irq_src *source,
692 				      struct amdgpu_iv_entry *entry)
693 {
694 	DRM_DEBUG("IH: VCE\n");
695 
696 	WREG32_FIELD(VCE_SYS_INT_STATUS, VCE_SYS_INT_TRAP_INTERRUPT_INT, 1);
697 
698 	switch (entry->src_data) {
699 	case 0:
700 	case 1:
701 	case 2:
702 		amdgpu_fence_process(&adev->vce.ring[entry->src_data]);
703 		break;
704 	default:
705 		DRM_ERROR("Unhandled interrupt: %d %d\n",
706 			  entry->src_id, entry->src_data);
707 		break;
708 	}
709 
710 	return 0;
711 }
712 
713 static int vce_v3_0_set_clockgating_state(void *handle,
714 					  enum amd_clockgating_state state)
715 {
716 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
717 	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
718 	int i;
719 
720 	if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
721 		return 0;
722 
723 	mutex_lock(&adev->grbm_idx_mutex);
724 	for (i = 0; i < 2; i++) {
725 		/* Program VCE Instance 0 or 1 if not harvested */
726 		if (adev->vce.harvest_config & (1 << i))
727 			continue;
728 
729 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(i));
730 
731 		if (enable) {
732 			/* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
733 			uint32_t data = RREG32(mmVCE_CLOCK_GATING_A);
734 			data &= ~(0xf | 0xff0);
735 			data |= ((0x0 << 0) | (0x04 << 4));
736 			WREG32(mmVCE_CLOCK_GATING_A, data);
737 
738 			/* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
739 			data = RREG32(mmVCE_UENC_CLOCK_GATING);
740 			data &= ~(0xf | 0xff0);
741 			data |= ((0x0 << 0) | (0x04 << 4));
742 			WREG32(mmVCE_UENC_CLOCK_GATING, data);
743 		}
744 
745 		vce_v3_0_set_vce_sw_clock_gating(adev, enable);
746 	}
747 
748 	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
749 	mutex_unlock(&adev->grbm_idx_mutex);
750 
751 	return 0;
752 }
753 
754 static int vce_v3_0_set_powergating_state(void *handle,
755 					  enum amd_powergating_state state)
756 {
757 	/* This doesn't actually powergate the VCE block.
758 	 * That's done in the dpm code via the SMC.  This
759 	 * just re-inits the block as necessary.  The actual
760 	 * gating still happens in the dpm code.  We should
761 	 * revisit this when there is a cleaner line between
762 	 * the smc and the hw blocks
763 	 */
764 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
765 	int ret = 0;
766 
767 	if (state == AMD_PG_STATE_GATE) {
768 		ret = vce_v3_0_stop(adev);
769 		if (ret)
770 			goto out;
771 	} else {
772 		ret = vce_v3_0_start(adev);
773 		if (ret)
774 			goto out;
775 	}
776 
777 out:
778 	return ret;
779 }
780 
781 static void vce_v3_0_get_clockgating_state(void *handle, u32 *flags)
782 {
783 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
784 	int data;
785 
786 	mutex_lock(&adev->pm.mutex);
787 
788 	if (RREG32_SMC(ixCURRENT_PG_STATUS) &
789 			CURRENT_PG_STATUS__VCE_PG_STATUS_MASK) {
790 		DRM_INFO("Cannot get clockgating state when VCE is powergated.\n");
791 		goto out;
792 	}
793 
794 	WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
795 
796 	/* AMD_CG_SUPPORT_VCE_MGCG */
797 	data = RREG32(mmVCE_CLOCK_GATING_A);
798 	if (data & (0x04 << 4))
799 		*flags |= AMD_CG_SUPPORT_VCE_MGCG;
800 
801 out:
802 	mutex_unlock(&adev->pm.mutex);
803 }
804 
805 static void vce_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
806 		struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch)
807 {
808 	amdgpu_ring_write(ring, VCE_CMD_IB_VM);
809 	amdgpu_ring_write(ring, vm_id);
810 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
811 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
812 	amdgpu_ring_write(ring, ib->length_dw);
813 }
814 
815 static void vce_v3_0_emit_vm_flush(struct amdgpu_ring *ring,
816 			 unsigned int vm_id, uint64_t pd_addr)
817 {
818 	amdgpu_ring_write(ring, VCE_CMD_UPDATE_PTB);
819 	amdgpu_ring_write(ring, vm_id);
820 	amdgpu_ring_write(ring, pd_addr >> 12);
821 
822 	amdgpu_ring_write(ring, VCE_CMD_FLUSH_TLB);
823 	amdgpu_ring_write(ring, vm_id);
824 	amdgpu_ring_write(ring, VCE_CMD_END);
825 }
826 
827 static void vce_v3_0_emit_pipeline_sync(struct amdgpu_ring *ring)
828 {
829 	uint32_t seq = ring->fence_drv.sync_seq;
830 	uint64_t addr = ring->fence_drv.gpu_addr;
831 
832 	amdgpu_ring_write(ring, VCE_CMD_WAIT_GE);
833 	amdgpu_ring_write(ring, lower_32_bits(addr));
834 	amdgpu_ring_write(ring, upper_32_bits(addr));
835 	amdgpu_ring_write(ring, seq);
836 }
837 
838 static const struct amd_ip_funcs vce_v3_0_ip_funcs = {
839 	.name = "vce_v3_0",
840 	.early_init = vce_v3_0_early_init,
841 	.late_init = NULL,
842 	.sw_init = vce_v3_0_sw_init,
843 	.sw_fini = vce_v3_0_sw_fini,
844 	.hw_init = vce_v3_0_hw_init,
845 	.hw_fini = vce_v3_0_hw_fini,
846 	.suspend = vce_v3_0_suspend,
847 	.resume = vce_v3_0_resume,
848 	.is_idle = vce_v3_0_is_idle,
849 	.wait_for_idle = vce_v3_0_wait_for_idle,
850 	.check_soft_reset = vce_v3_0_check_soft_reset,
851 	.pre_soft_reset = vce_v3_0_pre_soft_reset,
852 	.soft_reset = vce_v3_0_soft_reset,
853 	.post_soft_reset = vce_v3_0_post_soft_reset,
854 	.set_clockgating_state = vce_v3_0_set_clockgating_state,
855 	.set_powergating_state = vce_v3_0_set_powergating_state,
856 	.get_clockgating_state = vce_v3_0_get_clockgating_state,
857 };
858 
859 static const struct amdgpu_ring_funcs vce_v3_0_ring_phys_funcs = {
860 	.type = AMDGPU_RING_TYPE_VCE,
861 	.align_mask = 0xf,
862 	.nop = VCE_CMD_NO_OP,
863 	.get_rptr = vce_v3_0_ring_get_rptr,
864 	.get_wptr = vce_v3_0_ring_get_wptr,
865 	.set_wptr = vce_v3_0_ring_set_wptr,
866 	.parse_cs = amdgpu_vce_ring_parse_cs,
867 	.emit_frame_size =
868 		4 + /* vce_v3_0_emit_pipeline_sync */
869 		6, /* amdgpu_vce_ring_emit_fence x1 no user fence */
870 	.emit_ib_size = 5, /* vce_v3_0_ring_emit_ib */
871 	.emit_ib = amdgpu_vce_ring_emit_ib,
872 	.emit_fence = amdgpu_vce_ring_emit_fence,
873 	.test_ring = amdgpu_vce_ring_test_ring,
874 	.test_ib = amdgpu_vce_ring_test_ib,
875 	.insert_nop = amdgpu_ring_insert_nop,
876 	.pad_ib = amdgpu_ring_generic_pad_ib,
877 	.begin_use = amdgpu_vce_ring_begin_use,
878 	.end_use = amdgpu_vce_ring_end_use,
879 };
880 
881 static const struct amdgpu_ring_funcs vce_v3_0_ring_vm_funcs = {
882 	.type = AMDGPU_RING_TYPE_VCE,
883 	.align_mask = 0xf,
884 	.nop = VCE_CMD_NO_OP,
885 	.get_rptr = vce_v3_0_ring_get_rptr,
886 	.get_wptr = vce_v3_0_ring_get_wptr,
887 	.set_wptr = vce_v3_0_ring_set_wptr,
888 	.parse_cs = amdgpu_vce_ring_parse_cs_vm,
889 	.emit_frame_size =
890 		6 + /* vce_v3_0_emit_vm_flush */
891 		4 + /* vce_v3_0_emit_pipeline_sync */
892 		6 + 6, /* amdgpu_vce_ring_emit_fence x2 vm fence */
893 	.emit_ib_size = 4, /* amdgpu_vce_ring_emit_ib */
894 	.emit_ib = vce_v3_0_ring_emit_ib,
895 	.emit_vm_flush = vce_v3_0_emit_vm_flush,
896 	.emit_pipeline_sync = vce_v3_0_emit_pipeline_sync,
897 	.emit_fence = amdgpu_vce_ring_emit_fence,
898 	.test_ring = amdgpu_vce_ring_test_ring,
899 	.test_ib = amdgpu_vce_ring_test_ib,
900 	.insert_nop = amdgpu_ring_insert_nop,
901 	.pad_ib = amdgpu_ring_generic_pad_ib,
902 	.begin_use = amdgpu_vce_ring_begin_use,
903 	.end_use = amdgpu_vce_ring_end_use,
904 };
905 
906 static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev)
907 {
908 	int i;
909 
910 	if (adev->asic_type >= CHIP_STONEY) {
911 		for (i = 0; i < adev->vce.num_rings; i++)
912 			adev->vce.ring[i].funcs = &vce_v3_0_ring_vm_funcs;
913 		DRM_INFO("VCE enabled in VM mode\n");
914 	} else {
915 		for (i = 0; i < adev->vce.num_rings; i++)
916 			adev->vce.ring[i].funcs = &vce_v3_0_ring_phys_funcs;
917 		DRM_INFO("VCE enabled in physical mode\n");
918 	}
919 }
920 
921 static const struct amdgpu_irq_src_funcs vce_v3_0_irq_funcs = {
922 	.set = vce_v3_0_set_interrupt_state,
923 	.process = vce_v3_0_process_interrupt,
924 };
925 
926 static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev)
927 {
928 	adev->vce.irq.num_types = 1;
929 	adev->vce.irq.funcs = &vce_v3_0_irq_funcs;
930 };
931 
932 const struct amdgpu_ip_block_version vce_v3_0_ip_block =
933 {
934 	.type = AMD_IP_BLOCK_TYPE_VCE,
935 	.major = 3,
936 	.minor = 0,
937 	.rev = 0,
938 	.funcs = &vce_v3_0_ip_funcs,
939 };
940 
941 const struct amdgpu_ip_block_version vce_v3_1_ip_block =
942 {
943 	.type = AMD_IP_BLOCK_TYPE_VCE,
944 	.major = 3,
945 	.minor = 1,
946 	.rev = 0,
947 	.funcs = &vce_v3_0_ip_funcs,
948 };
949 
950 const struct amdgpu_ip_block_version vce_v3_4_ip_block =
951 {
952 	.type = AMD_IP_BLOCK_TYPE_VCE,
953 	.major = 3,
954 	.minor = 4,
955 	.rev = 0,
956 	.funcs = &vce_v3_0_ip_funcs,
957 };
958