xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c (revision 50237287)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  * Authors: Christian König <christian.koenig@amd.com>
26  */
27 
28 #include <linux/firmware.h>
29 #include <drm/drmP.h>
30 #include "amdgpu.h"
31 #include "amdgpu_vce.h"
32 #include "vid.h"
33 #include "vce/vce_3_0_d.h"
34 #include "vce/vce_3_0_sh_mask.h"
35 #include "oss/oss_3_0_d.h"
36 #include "oss/oss_3_0_sh_mask.h"
37 #include "gca/gfx_8_0_d.h"
38 #include "smu/smu_7_1_2_d.h"
39 #include "smu/smu_7_1_2_sh_mask.h"
40 #include "gca/gfx_8_0_d.h"
41 #include "gca/gfx_8_0_sh_mask.h"
42 
43 
44 #define GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT	0x04
45 #define GRBM_GFX_INDEX__VCE_INSTANCE_MASK	0x10
46 #define GRBM_GFX_INDEX__VCE_ALL_PIPE		0x07
47 
48 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR0	0x8616
49 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR1	0x8617
50 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR2	0x8618
51 #define mmGRBM_GFX_INDEX_DEFAULT 0xE0000000
52 
53 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
54 
55 #define VCE_V3_0_FW_SIZE	(384 * 1024)
56 #define VCE_V3_0_STACK_SIZE	(64 * 1024)
57 #define VCE_V3_0_DATA_SIZE	((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
58 
59 #define FW_52_8_3	((52 << 24) | (8 << 16) | (3 << 8))
60 
61 #define GET_VCE_INSTANCE(i)  ((i) << GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT \
62 					| GRBM_GFX_INDEX__VCE_ALL_PIPE)
63 
64 static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx);
65 static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev);
66 static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev);
67 static int vce_v3_0_wait_for_idle(void *handle);
68 static int vce_v3_0_set_clockgating_state(void *handle,
69 					  enum amd_clockgating_state state);
70 /**
71  * vce_v3_0_ring_get_rptr - get read pointer
72  *
73  * @ring: amdgpu_ring pointer
74  *
75  * Returns the current hardware read pointer
76  */
77 static uint64_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring)
78 {
79 	struct amdgpu_device *adev = ring->adev;
80 
81 	if (ring == &adev->vce.ring[0])
82 		return RREG32(mmVCE_RB_RPTR);
83 	else if (ring == &adev->vce.ring[1])
84 		return RREG32(mmVCE_RB_RPTR2);
85 	else
86 		return RREG32(mmVCE_RB_RPTR3);
87 }
88 
89 /**
90  * vce_v3_0_ring_get_wptr - get write pointer
91  *
92  * @ring: amdgpu_ring pointer
93  *
94  * Returns the current hardware write pointer
95  */
96 static uint64_t vce_v3_0_ring_get_wptr(struct amdgpu_ring *ring)
97 {
98 	struct amdgpu_device *adev = ring->adev;
99 
100 	if (ring == &adev->vce.ring[0])
101 		return RREG32(mmVCE_RB_WPTR);
102 	else if (ring == &adev->vce.ring[1])
103 		return RREG32(mmVCE_RB_WPTR2);
104 	else
105 		return RREG32(mmVCE_RB_WPTR3);
106 }
107 
108 /**
109  * vce_v3_0_ring_set_wptr - set write pointer
110  *
111  * @ring: amdgpu_ring pointer
112  *
113  * Commits the write pointer to the hardware
114  */
115 static void vce_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
116 {
117 	struct amdgpu_device *adev = ring->adev;
118 
119 	if (ring == &adev->vce.ring[0])
120 		WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
121 	else if (ring == &adev->vce.ring[1])
122 		WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
123 	else
124 		WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr));
125 }
126 
127 static void vce_v3_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
128 {
129 	WREG32_FIELD(VCE_RB_ARB_CTRL, VCE_CGTT_OVERRIDE, override ? 1 : 0);
130 }
131 
132 static void vce_v3_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
133 					     bool gated)
134 {
135 	u32 data;
136 
137 	/* Set Override to disable Clock Gating */
138 	vce_v3_0_override_vce_clock_gating(adev, true);
139 
140 	/* This function enables MGCG which is controlled by firmware.
141 	   With the clocks in the gated state the core is still
142 	   accessible but the firmware will throttle the clocks on the
143 	   fly as necessary.
144 	*/
145 	if (!gated) {
146 		data = RREG32(mmVCE_CLOCK_GATING_B);
147 		data |= 0x1ff;
148 		data &= ~0xef0000;
149 		WREG32(mmVCE_CLOCK_GATING_B, data);
150 
151 		data = RREG32(mmVCE_UENC_CLOCK_GATING);
152 		data |= 0x3ff000;
153 		data &= ~0xffc00000;
154 		WREG32(mmVCE_UENC_CLOCK_GATING, data);
155 
156 		data = RREG32(mmVCE_UENC_CLOCK_GATING_2);
157 		data |= 0x2;
158 		data &= ~0x00010000;
159 		WREG32(mmVCE_UENC_CLOCK_GATING_2, data);
160 
161 		data = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
162 		data |= 0x37f;
163 		WREG32(mmVCE_UENC_REG_CLOCK_GATING, data);
164 
165 		data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL);
166 		data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
167 			VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
168 			VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
169 			0x8;
170 		WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data);
171 	} else {
172 		data = RREG32(mmVCE_CLOCK_GATING_B);
173 		data &= ~0x80010;
174 		data |= 0xe70008;
175 		WREG32(mmVCE_CLOCK_GATING_B, data);
176 
177 		data = RREG32(mmVCE_UENC_CLOCK_GATING);
178 		data |= 0xffc00000;
179 		WREG32(mmVCE_UENC_CLOCK_GATING, data);
180 
181 		data = RREG32(mmVCE_UENC_CLOCK_GATING_2);
182 		data |= 0x10000;
183 		WREG32(mmVCE_UENC_CLOCK_GATING_2, data);
184 
185 		data = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
186 		data &= ~0x3ff;
187 		WREG32(mmVCE_UENC_REG_CLOCK_GATING, data);
188 
189 		data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL);
190 		data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
191 			  VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
192 			  VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
193 			  0x8);
194 		WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data);
195 	}
196 	vce_v3_0_override_vce_clock_gating(adev, false);
197 }
198 
199 static int vce_v3_0_firmware_loaded(struct amdgpu_device *adev)
200 {
201 	int i, j;
202 
203 	for (i = 0; i < 10; ++i) {
204 		for (j = 0; j < 100; ++j) {
205 			uint32_t status = RREG32(mmVCE_STATUS);
206 
207 			if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
208 				return 0;
209 			mdelay(10);
210 		}
211 
212 		DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
213 		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 1);
214 		mdelay(10);
215 		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 0);
216 		mdelay(10);
217 	}
218 
219 	return -ETIMEDOUT;
220 }
221 
222 /**
223  * vce_v3_0_start - start VCE block
224  *
225  * @adev: amdgpu_device pointer
226  *
227  * Setup and start the VCE block
228  */
229 static int vce_v3_0_start(struct amdgpu_device *adev)
230 {
231 	struct amdgpu_ring *ring;
232 	int idx, r;
233 
234 	ring = &adev->vce.ring[0];
235 	WREG32(mmVCE_RB_RPTR, lower_32_bits(ring->wptr));
236 	WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
237 	WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr);
238 	WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
239 	WREG32(mmVCE_RB_SIZE, ring->ring_size / 4);
240 
241 	ring = &adev->vce.ring[1];
242 	WREG32(mmVCE_RB_RPTR2, lower_32_bits(ring->wptr));
243 	WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
244 	WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr);
245 	WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
246 	WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4);
247 
248 	ring = &adev->vce.ring[2];
249 	WREG32(mmVCE_RB_RPTR3, lower_32_bits(ring->wptr));
250 	WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr));
251 	WREG32(mmVCE_RB_BASE_LO3, ring->gpu_addr);
252 	WREG32(mmVCE_RB_BASE_HI3, upper_32_bits(ring->gpu_addr));
253 	WREG32(mmVCE_RB_SIZE3, ring->ring_size / 4);
254 
255 	mutex_lock(&adev->grbm_idx_mutex);
256 	for (idx = 0; idx < 2; ++idx) {
257 		if (adev->vce.harvest_config & (1 << idx))
258 			continue;
259 
260 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(idx));
261 		vce_v3_0_mc_resume(adev, idx);
262 		WREG32_FIELD(VCE_STATUS, JOB_BUSY, 1);
263 
264 		if (adev->asic_type >= CHIP_STONEY)
265 			WREG32_P(mmVCE_VCPU_CNTL, 1, ~0x200001);
266 		else
267 			WREG32_FIELD(VCE_VCPU_CNTL, CLK_EN, 1);
268 
269 		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 0);
270 		mdelay(100);
271 
272 		r = vce_v3_0_firmware_loaded(adev);
273 
274 		/* clear BUSY flag */
275 		WREG32_FIELD(VCE_STATUS, JOB_BUSY, 0);
276 
277 		if (r) {
278 			DRM_ERROR("VCE not responding, giving up!!!\n");
279 			mutex_unlock(&adev->grbm_idx_mutex);
280 			return r;
281 		}
282 	}
283 
284 	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
285 	mutex_unlock(&adev->grbm_idx_mutex);
286 
287 	return 0;
288 }
289 
290 static int vce_v3_0_stop(struct amdgpu_device *adev)
291 {
292 	int idx;
293 
294 	mutex_lock(&adev->grbm_idx_mutex);
295 	for (idx = 0; idx < 2; ++idx) {
296 		if (adev->vce.harvest_config & (1 << idx))
297 			continue;
298 
299 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(idx));
300 
301 		if (adev->asic_type >= CHIP_STONEY)
302 			WREG32_P(mmVCE_VCPU_CNTL, 0, ~0x200001);
303 		else
304 			WREG32_FIELD(VCE_VCPU_CNTL, CLK_EN, 0);
305 
306 		/* hold on ECPU */
307 		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 1);
308 
309 		/* clear VCE STATUS */
310 		WREG32(mmVCE_STATUS, 0);
311 	}
312 
313 	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
314 	mutex_unlock(&adev->grbm_idx_mutex);
315 
316 	return 0;
317 }
318 
319 #define ixVCE_HARVEST_FUSE_MACRO__ADDRESS     0xC0014074
320 #define VCE_HARVEST_FUSE_MACRO__SHIFT       27
321 #define VCE_HARVEST_FUSE_MACRO__MASK        0x18000000
322 
323 static unsigned vce_v3_0_get_harvest_config(struct amdgpu_device *adev)
324 {
325 	u32 tmp;
326 
327 	/* Fiji, Stoney, Polaris10, Polaris11, Polaris12 are single pipe */
328 	if ((adev->asic_type == CHIP_FIJI) ||
329 	    (adev->asic_type == CHIP_STONEY) ||
330 	    (adev->asic_type == CHIP_POLARIS10) ||
331 	    (adev->asic_type == CHIP_POLARIS11) ||
332 	    (adev->asic_type == CHIP_POLARIS12))
333 		return AMDGPU_VCE_HARVEST_VCE1;
334 
335 	/* Tonga and CZ are dual or single pipe */
336 	if (adev->flags & AMD_IS_APU)
337 		tmp = (RREG32_SMC(ixVCE_HARVEST_FUSE_MACRO__ADDRESS) &
338 		       VCE_HARVEST_FUSE_MACRO__MASK) >>
339 			VCE_HARVEST_FUSE_MACRO__SHIFT;
340 	else
341 		tmp = (RREG32_SMC(ixCC_HARVEST_FUSES) &
342 		       CC_HARVEST_FUSES__VCE_DISABLE_MASK) >>
343 			CC_HARVEST_FUSES__VCE_DISABLE__SHIFT;
344 
345 	switch (tmp) {
346 	case 1:
347 		return AMDGPU_VCE_HARVEST_VCE0;
348 	case 2:
349 		return AMDGPU_VCE_HARVEST_VCE1;
350 	case 3:
351 		return AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1;
352 	default:
353 		return 0;
354 	}
355 }
356 
357 static int vce_v3_0_early_init(void *handle)
358 {
359 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
360 
361 	adev->vce.harvest_config = vce_v3_0_get_harvest_config(adev);
362 
363 	if ((adev->vce.harvest_config &
364 	     (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1)) ==
365 	    (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1))
366 		return -ENOENT;
367 
368 	adev->vce.num_rings = 3;
369 
370 	vce_v3_0_set_ring_funcs(adev);
371 	vce_v3_0_set_irq_funcs(adev);
372 
373 	return 0;
374 }
375 
376 static int vce_v3_0_sw_init(void *handle)
377 {
378 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
379 	struct amdgpu_ring *ring;
380 	int r, i;
381 
382 	/* VCE */
383 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 167, &adev->vce.irq);
384 	if (r)
385 		return r;
386 
387 	r = amdgpu_vce_sw_init(adev, VCE_V3_0_FW_SIZE +
388 		(VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE) * 2);
389 	if (r)
390 		return r;
391 
392 	/* 52.8.3 required for 3 ring support */
393 	if (adev->vce.fw_version < FW_52_8_3)
394 		adev->vce.num_rings = 2;
395 
396 	r = amdgpu_vce_resume(adev);
397 	if (r)
398 		return r;
399 
400 	for (i = 0; i < adev->vce.num_rings; i++) {
401 		ring = &adev->vce.ring[i];
402 		sprintf(ring->name, "vce%d", i);
403 		r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
404 		if (r)
405 			return r;
406 	}
407 
408 	return r;
409 }
410 
411 static int vce_v3_0_sw_fini(void *handle)
412 {
413 	int r;
414 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
415 
416 	r = amdgpu_vce_suspend(adev);
417 	if (r)
418 		return r;
419 
420 	return amdgpu_vce_sw_fini(adev);
421 }
422 
423 static int vce_v3_0_hw_init(void *handle)
424 {
425 	int r, i;
426 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
427 
428 	vce_v3_0_override_vce_clock_gating(adev, true);
429 	if (!(adev->flags & AMD_IS_APU))
430 		amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
431 
432 	for (i = 0; i < adev->vce.num_rings; i++)
433 		adev->vce.ring[i].ready = false;
434 
435 	for (i = 0; i < adev->vce.num_rings; i++) {
436 		r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
437 		if (r)
438 			return r;
439 		else
440 			adev->vce.ring[i].ready = true;
441 	}
442 
443 	DRM_INFO("VCE initialized successfully.\n");
444 
445 	return 0;
446 }
447 
448 static int vce_v3_0_hw_fini(void *handle)
449 {
450 	int r;
451 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
452 
453 	r = vce_v3_0_wait_for_idle(handle);
454 	if (r)
455 		return r;
456 
457 	vce_v3_0_stop(adev);
458 	return vce_v3_0_set_clockgating_state(adev, AMD_CG_STATE_GATE);
459 }
460 
461 static int vce_v3_0_suspend(void *handle)
462 {
463 	int r;
464 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
465 
466 	r = vce_v3_0_hw_fini(adev);
467 	if (r)
468 		return r;
469 
470 	return amdgpu_vce_suspend(adev);
471 }
472 
473 static int vce_v3_0_resume(void *handle)
474 {
475 	int r;
476 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
477 
478 	r = amdgpu_vce_resume(adev);
479 	if (r)
480 		return r;
481 
482 	return vce_v3_0_hw_init(adev);
483 }
484 
485 static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx)
486 {
487 	uint32_t offset, size;
488 
489 	WREG32_P(mmVCE_CLOCK_GATING_A, 0, ~(1 << 16));
490 	WREG32_P(mmVCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000);
491 	WREG32_P(mmVCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F);
492 	WREG32(mmVCE_CLOCK_GATING_B, 0x1FF);
493 
494 	WREG32(mmVCE_LMI_CTRL, 0x00398000);
495 	WREG32_P(mmVCE_LMI_CACHE_CTRL, 0x0, ~0x1);
496 	WREG32(mmVCE_LMI_SWAP_CNTL, 0);
497 	WREG32(mmVCE_LMI_SWAP_CNTL1, 0);
498 	WREG32(mmVCE_LMI_VM_CTRL, 0);
499 	WREG32_OR(mmVCE_VCPU_CNTL, 0x00100000);
500 
501 	if (adev->asic_type >= CHIP_STONEY) {
502 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR0, (adev->vce.gpu_addr >> 8));
503 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR1, (adev->vce.gpu_addr >> 8));
504 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR2, (adev->vce.gpu_addr >> 8));
505 	} else
506 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8));
507 	offset = AMDGPU_VCE_FIRMWARE_OFFSET;
508 	size = VCE_V3_0_FW_SIZE;
509 	WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff);
510 	WREG32(mmVCE_VCPU_CACHE_SIZE0, size);
511 
512 	if (idx == 0) {
513 		offset += size;
514 		size = VCE_V3_0_STACK_SIZE;
515 		WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0x7fffffff);
516 		WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
517 		offset += size;
518 		size = VCE_V3_0_DATA_SIZE;
519 		WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0x7fffffff);
520 		WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
521 	} else {
522 		offset += size + VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE;
523 		size = VCE_V3_0_STACK_SIZE;
524 		WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0xfffffff);
525 		WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
526 		offset += size;
527 		size = VCE_V3_0_DATA_SIZE;
528 		WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0xfffffff);
529 		WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
530 	}
531 
532 	WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100);
533 	WREG32_FIELD(VCE_SYS_INT_EN, VCE_SYS_INT_TRAP_INTERRUPT_EN, 1);
534 }
535 
536 static bool vce_v3_0_is_idle(void *handle)
537 {
538 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
539 	u32 mask = 0;
540 
541 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
542 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
543 
544 	return !(RREG32(mmSRBM_STATUS2) & mask);
545 }
546 
547 static int vce_v3_0_wait_for_idle(void *handle)
548 {
549 	unsigned i;
550 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
551 
552 	for (i = 0; i < adev->usec_timeout; i++)
553 		if (vce_v3_0_is_idle(handle))
554 			return 0;
555 
556 	return -ETIMEDOUT;
557 }
558 
559 #define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
560 #define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
561 #define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
562 #define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
563 				      VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
564 
565 static bool vce_v3_0_check_soft_reset(void *handle)
566 {
567 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
568 	u32 srbm_soft_reset = 0;
569 
570 	/* According to VCE team , we should use VCE_STATUS instead
571 	 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
572 	 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
573 	 * instance's registers are accessed
574 	 * (0 for 1st instance, 10 for 2nd instance).
575 	 *
576 	 *VCE_STATUS
577 	 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
578 	 *|----+----+-----------+----+----+----+----------+---------+----|
579 	 *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
580 	 *
581 	 * VCE team suggest use bit 3--bit 6 for busy status check
582 	 */
583 	mutex_lock(&adev->grbm_idx_mutex);
584 	WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
585 	if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
586 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
587 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
588 	}
589 	WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
590 	if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
591 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
592 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
593 	}
594 	WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
595 	mutex_unlock(&adev->grbm_idx_mutex);
596 
597 	if (srbm_soft_reset) {
598 		adev->vce.srbm_soft_reset = srbm_soft_reset;
599 		return true;
600 	} else {
601 		adev->vce.srbm_soft_reset = 0;
602 		return false;
603 	}
604 }
605 
606 static int vce_v3_0_soft_reset(void *handle)
607 {
608 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
609 	u32 srbm_soft_reset;
610 
611 	if (!adev->vce.srbm_soft_reset)
612 		return 0;
613 	srbm_soft_reset = adev->vce.srbm_soft_reset;
614 
615 	if (srbm_soft_reset) {
616 		u32 tmp;
617 
618 		tmp = RREG32(mmSRBM_SOFT_RESET);
619 		tmp |= srbm_soft_reset;
620 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
621 		WREG32(mmSRBM_SOFT_RESET, tmp);
622 		tmp = RREG32(mmSRBM_SOFT_RESET);
623 
624 		udelay(50);
625 
626 		tmp &= ~srbm_soft_reset;
627 		WREG32(mmSRBM_SOFT_RESET, tmp);
628 		tmp = RREG32(mmSRBM_SOFT_RESET);
629 
630 		/* Wait a little for things to settle down */
631 		udelay(50);
632 	}
633 
634 	return 0;
635 }
636 
637 static int vce_v3_0_pre_soft_reset(void *handle)
638 {
639 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
640 
641 	if (!adev->vce.srbm_soft_reset)
642 		return 0;
643 
644 	mdelay(5);
645 
646 	return vce_v3_0_suspend(adev);
647 }
648 
649 
650 static int vce_v3_0_post_soft_reset(void *handle)
651 {
652 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
653 
654 	if (!adev->vce.srbm_soft_reset)
655 		return 0;
656 
657 	mdelay(5);
658 
659 	return vce_v3_0_resume(adev);
660 }
661 
662 static int vce_v3_0_set_interrupt_state(struct amdgpu_device *adev,
663 					struct amdgpu_irq_src *source,
664 					unsigned type,
665 					enum amdgpu_interrupt_state state)
666 {
667 	uint32_t val = 0;
668 
669 	if (state == AMDGPU_IRQ_STATE_ENABLE)
670 		val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
671 
672 	WREG32_P(mmVCE_SYS_INT_EN, val, ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
673 	return 0;
674 }
675 
676 static int vce_v3_0_process_interrupt(struct amdgpu_device *adev,
677 				      struct amdgpu_irq_src *source,
678 				      struct amdgpu_iv_entry *entry)
679 {
680 	DRM_DEBUG("IH: VCE\n");
681 
682 	WREG32_FIELD(VCE_SYS_INT_STATUS, VCE_SYS_INT_TRAP_INTERRUPT_INT, 1);
683 
684 	switch (entry->src_data[0]) {
685 	case 0:
686 	case 1:
687 	case 2:
688 		amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
689 		break;
690 	default:
691 		DRM_ERROR("Unhandled interrupt: %d %d\n",
692 			  entry->src_id, entry->src_data[0]);
693 		break;
694 	}
695 
696 	return 0;
697 }
698 
699 static int vce_v3_0_set_clockgating_state(void *handle,
700 					  enum amd_clockgating_state state)
701 {
702 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
703 	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
704 	int i;
705 
706 	if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
707 		return 0;
708 
709 	mutex_lock(&adev->grbm_idx_mutex);
710 	for (i = 0; i < 2; i++) {
711 		/* Program VCE Instance 0 or 1 if not harvested */
712 		if (adev->vce.harvest_config & (1 << i))
713 			continue;
714 
715 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(i));
716 
717 		if (!enable) {
718 			/* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
719 			uint32_t data = RREG32(mmVCE_CLOCK_GATING_A);
720 			data &= ~(0xf | 0xff0);
721 			data |= ((0x0 << 0) | (0x04 << 4));
722 			WREG32(mmVCE_CLOCK_GATING_A, data);
723 
724 			/* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
725 			data = RREG32(mmVCE_UENC_CLOCK_GATING);
726 			data &= ~(0xf | 0xff0);
727 			data |= ((0x0 << 0) | (0x04 << 4));
728 			WREG32(mmVCE_UENC_CLOCK_GATING, data);
729 		}
730 
731 		vce_v3_0_set_vce_sw_clock_gating(adev, enable);
732 	}
733 
734 	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
735 	mutex_unlock(&adev->grbm_idx_mutex);
736 
737 	return 0;
738 }
739 
740 static int vce_v3_0_set_powergating_state(void *handle,
741 					  enum amd_powergating_state state)
742 {
743 	/* This doesn't actually powergate the VCE block.
744 	 * That's done in the dpm code via the SMC.  This
745 	 * just re-inits the block as necessary.  The actual
746 	 * gating still happens in the dpm code.  We should
747 	 * revisit this when there is a cleaner line between
748 	 * the smc and the hw blocks
749 	 */
750 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
751 	int ret = 0;
752 
753 	if (state == AMD_PG_STATE_GATE) {
754 		ret = vce_v3_0_stop(adev);
755 		if (ret)
756 			goto out;
757 	} else {
758 		ret = vce_v3_0_start(adev);
759 		if (ret)
760 			goto out;
761 	}
762 
763 out:
764 	return ret;
765 }
766 
767 static void vce_v3_0_get_clockgating_state(void *handle, u32 *flags)
768 {
769 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
770 	int data;
771 
772 	mutex_lock(&adev->pm.mutex);
773 
774 	if (adev->flags & AMD_IS_APU)
775 		data = RREG32_SMC(ixCURRENT_PG_STATUS_APU);
776 	else
777 		data = RREG32_SMC(ixCURRENT_PG_STATUS);
778 
779 	if (data & CURRENT_PG_STATUS__VCE_PG_STATUS_MASK) {
780 		DRM_INFO("Cannot get clockgating state when VCE is powergated.\n");
781 		goto out;
782 	}
783 
784 	WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
785 
786 	/* AMD_CG_SUPPORT_VCE_MGCG */
787 	data = RREG32(mmVCE_CLOCK_GATING_A);
788 	if (data & (0x04 << 4))
789 		*flags |= AMD_CG_SUPPORT_VCE_MGCG;
790 
791 out:
792 	mutex_unlock(&adev->pm.mutex);
793 }
794 
795 static void vce_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
796 		struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch)
797 {
798 	amdgpu_ring_write(ring, VCE_CMD_IB_VM);
799 	amdgpu_ring_write(ring, vm_id);
800 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
801 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
802 	amdgpu_ring_write(ring, ib->length_dw);
803 }
804 
805 static void vce_v3_0_emit_vm_flush(struct amdgpu_ring *ring,
806 			 unsigned int vm_id, uint64_t pd_addr)
807 {
808 	amdgpu_ring_write(ring, VCE_CMD_UPDATE_PTB);
809 	amdgpu_ring_write(ring, vm_id);
810 	amdgpu_ring_write(ring, pd_addr >> 12);
811 
812 	amdgpu_ring_write(ring, VCE_CMD_FLUSH_TLB);
813 	amdgpu_ring_write(ring, vm_id);
814 	amdgpu_ring_write(ring, VCE_CMD_END);
815 }
816 
817 static void vce_v3_0_emit_pipeline_sync(struct amdgpu_ring *ring)
818 {
819 	uint32_t seq = ring->fence_drv.sync_seq;
820 	uint64_t addr = ring->fence_drv.gpu_addr;
821 
822 	amdgpu_ring_write(ring, VCE_CMD_WAIT_GE);
823 	amdgpu_ring_write(ring, lower_32_bits(addr));
824 	amdgpu_ring_write(ring, upper_32_bits(addr));
825 	amdgpu_ring_write(ring, seq);
826 }
827 
828 static const struct amd_ip_funcs vce_v3_0_ip_funcs = {
829 	.name = "vce_v3_0",
830 	.early_init = vce_v3_0_early_init,
831 	.late_init = NULL,
832 	.sw_init = vce_v3_0_sw_init,
833 	.sw_fini = vce_v3_0_sw_fini,
834 	.hw_init = vce_v3_0_hw_init,
835 	.hw_fini = vce_v3_0_hw_fini,
836 	.suspend = vce_v3_0_suspend,
837 	.resume = vce_v3_0_resume,
838 	.is_idle = vce_v3_0_is_idle,
839 	.wait_for_idle = vce_v3_0_wait_for_idle,
840 	.check_soft_reset = vce_v3_0_check_soft_reset,
841 	.pre_soft_reset = vce_v3_0_pre_soft_reset,
842 	.soft_reset = vce_v3_0_soft_reset,
843 	.post_soft_reset = vce_v3_0_post_soft_reset,
844 	.set_clockgating_state = vce_v3_0_set_clockgating_state,
845 	.set_powergating_state = vce_v3_0_set_powergating_state,
846 	.get_clockgating_state = vce_v3_0_get_clockgating_state,
847 };
848 
849 static const struct amdgpu_ring_funcs vce_v3_0_ring_phys_funcs = {
850 	.type = AMDGPU_RING_TYPE_VCE,
851 	.align_mask = 0xf,
852 	.nop = VCE_CMD_NO_OP,
853 	.support_64bit_ptrs = false,
854 	.get_rptr = vce_v3_0_ring_get_rptr,
855 	.get_wptr = vce_v3_0_ring_get_wptr,
856 	.set_wptr = vce_v3_0_ring_set_wptr,
857 	.parse_cs = amdgpu_vce_ring_parse_cs,
858 	.emit_frame_size =
859 		4 + /* vce_v3_0_emit_pipeline_sync */
860 		6, /* amdgpu_vce_ring_emit_fence x1 no user fence */
861 	.emit_ib_size = 5, /* vce_v3_0_ring_emit_ib */
862 	.emit_ib = amdgpu_vce_ring_emit_ib,
863 	.emit_fence = amdgpu_vce_ring_emit_fence,
864 	.test_ring = amdgpu_vce_ring_test_ring,
865 	.test_ib = amdgpu_vce_ring_test_ib,
866 	.insert_nop = amdgpu_ring_insert_nop,
867 	.pad_ib = amdgpu_ring_generic_pad_ib,
868 	.begin_use = amdgpu_vce_ring_begin_use,
869 	.end_use = amdgpu_vce_ring_end_use,
870 };
871 
872 static const struct amdgpu_ring_funcs vce_v3_0_ring_vm_funcs = {
873 	.type = AMDGPU_RING_TYPE_VCE,
874 	.align_mask = 0xf,
875 	.nop = VCE_CMD_NO_OP,
876 	.support_64bit_ptrs = false,
877 	.get_rptr = vce_v3_0_ring_get_rptr,
878 	.get_wptr = vce_v3_0_ring_get_wptr,
879 	.set_wptr = vce_v3_0_ring_set_wptr,
880 	.parse_cs = amdgpu_vce_ring_parse_cs_vm,
881 	.emit_frame_size =
882 		6 + /* vce_v3_0_emit_vm_flush */
883 		4 + /* vce_v3_0_emit_pipeline_sync */
884 		6 + 6, /* amdgpu_vce_ring_emit_fence x2 vm fence */
885 	.emit_ib_size = 4, /* amdgpu_vce_ring_emit_ib */
886 	.emit_ib = vce_v3_0_ring_emit_ib,
887 	.emit_vm_flush = vce_v3_0_emit_vm_flush,
888 	.emit_pipeline_sync = vce_v3_0_emit_pipeline_sync,
889 	.emit_fence = amdgpu_vce_ring_emit_fence,
890 	.test_ring = amdgpu_vce_ring_test_ring,
891 	.test_ib = amdgpu_vce_ring_test_ib,
892 	.insert_nop = amdgpu_ring_insert_nop,
893 	.pad_ib = amdgpu_ring_generic_pad_ib,
894 	.begin_use = amdgpu_vce_ring_begin_use,
895 	.end_use = amdgpu_vce_ring_end_use,
896 };
897 
898 static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev)
899 {
900 	int i;
901 
902 	if (adev->asic_type >= CHIP_STONEY) {
903 		for (i = 0; i < adev->vce.num_rings; i++)
904 			adev->vce.ring[i].funcs = &vce_v3_0_ring_vm_funcs;
905 		DRM_INFO("VCE enabled in VM mode\n");
906 	} else {
907 		for (i = 0; i < adev->vce.num_rings; i++)
908 			adev->vce.ring[i].funcs = &vce_v3_0_ring_phys_funcs;
909 		DRM_INFO("VCE enabled in physical mode\n");
910 	}
911 }
912 
913 static const struct amdgpu_irq_src_funcs vce_v3_0_irq_funcs = {
914 	.set = vce_v3_0_set_interrupt_state,
915 	.process = vce_v3_0_process_interrupt,
916 };
917 
918 static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev)
919 {
920 	adev->vce.irq.num_types = 1;
921 	adev->vce.irq.funcs = &vce_v3_0_irq_funcs;
922 };
923 
924 const struct amdgpu_ip_block_version vce_v3_0_ip_block =
925 {
926 	.type = AMD_IP_BLOCK_TYPE_VCE,
927 	.major = 3,
928 	.minor = 0,
929 	.rev = 0,
930 	.funcs = &vce_v3_0_ip_funcs,
931 };
932 
933 const struct amdgpu_ip_block_version vce_v3_1_ip_block =
934 {
935 	.type = AMD_IP_BLOCK_TYPE_VCE,
936 	.major = 3,
937 	.minor = 1,
938 	.rev = 0,
939 	.funcs = &vce_v3_0_ip_funcs,
940 };
941 
942 const struct amdgpu_ip_block_version vce_v3_4_ip_block =
943 {
944 	.type = AMD_IP_BLOCK_TYPE_VCE,
945 	.major = 3,
946 	.minor = 4,
947 	.rev = 0,
948 	.funcs = &vce_v3_0_ip_funcs,
949 };
950