xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c (revision ac8e3f30)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  * Authors: Christian König <christian.koenig@amd.com>
26  */
27 
28 #include <linux/firmware.h>
29 #include <drm/drmP.h>
30 #include "amdgpu.h"
31 #include "amdgpu_vce.h"
32 #include "vid.h"
33 #include "vce/vce_3_0_d.h"
34 #include "vce/vce_3_0_sh_mask.h"
35 #include "oss/oss_3_0_d.h"
36 #include "oss/oss_3_0_sh_mask.h"
37 #include "gca/gfx_8_0_d.h"
38 #include "smu/smu_7_1_2_d.h"
39 #include "smu/smu_7_1_2_sh_mask.h"
40 #include "gca/gfx_8_0_d.h"
41 #include "gca/gfx_8_0_sh_mask.h"
42 
43 
44 #define GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT	0x04
45 #define GRBM_GFX_INDEX__VCE_INSTANCE_MASK	0x10
46 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR0	0x8616
47 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR1	0x8617
48 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR2	0x8618
49 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
50 
51 #define VCE_V3_0_FW_SIZE	(384 * 1024)
52 #define VCE_V3_0_STACK_SIZE	(64 * 1024)
53 #define VCE_V3_0_DATA_SIZE	((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
54 
55 static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx);
56 static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev);
57 static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev);
58 static int vce_v3_0_wait_for_idle(void *handle);
59 
60 /**
61  * vce_v3_0_ring_get_rptr - get read pointer
62  *
63  * @ring: amdgpu_ring pointer
64  *
65  * Returns the current hardware read pointer
66  */
67 static uint32_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring)
68 {
69 	struct amdgpu_device *adev = ring->adev;
70 
71 	if (ring == &adev->vce.ring[0])
72 		return RREG32(mmVCE_RB_RPTR);
73 	else
74 		return RREG32(mmVCE_RB_RPTR2);
75 }
76 
77 /**
78  * vce_v3_0_ring_get_wptr - get write pointer
79  *
80  * @ring: amdgpu_ring pointer
81  *
82  * Returns the current hardware write pointer
83  */
84 static uint32_t vce_v3_0_ring_get_wptr(struct amdgpu_ring *ring)
85 {
86 	struct amdgpu_device *adev = ring->adev;
87 
88 	if (ring == &adev->vce.ring[0])
89 		return RREG32(mmVCE_RB_WPTR);
90 	else
91 		return RREG32(mmVCE_RB_WPTR2);
92 }
93 
94 /**
95  * vce_v3_0_ring_set_wptr - set write pointer
96  *
97  * @ring: amdgpu_ring pointer
98  *
99  * Commits the write pointer to the hardware
100  */
101 static void vce_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
102 {
103 	struct amdgpu_device *adev = ring->adev;
104 
105 	if (ring == &adev->vce.ring[0])
106 		WREG32(mmVCE_RB_WPTR, ring->wptr);
107 	else
108 		WREG32(mmVCE_RB_WPTR2, ring->wptr);
109 }
110 
111 static void vce_v3_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
112 {
113 	u32 tmp, data;
114 
115 	tmp = data = RREG32(mmVCE_RB_ARB_CTRL);
116 	if (override)
117 		data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
118 	else
119 		data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
120 
121 	if (tmp != data)
122 		WREG32(mmVCE_RB_ARB_CTRL, data);
123 }
124 
125 static void vce_v3_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
126 					     bool gated)
127 {
128 	u32 tmp, data;
129 
130 	/* Set Override to disable Clock Gating */
131 	vce_v3_0_override_vce_clock_gating(adev, true);
132 
133 	/* This function enables MGCG which is controlled by firmware.
134 	   With the clocks in the gated state the core is still
135 	   accessible but the firmware will throttle the clocks on the
136 	   fly as necessary.
137 	*/
138 	if (gated) {
139 		tmp = data = RREG32(mmVCE_CLOCK_GATING_B);
140 		data |= 0x1ff;
141 		data &= ~0xef0000;
142 		if (tmp != data)
143 			WREG32(mmVCE_CLOCK_GATING_B, data);
144 
145 		tmp = data = RREG32(mmVCE_UENC_CLOCK_GATING);
146 		data |= 0x3ff000;
147 		data &= ~0xffc00000;
148 		if (tmp != data)
149 			WREG32(mmVCE_UENC_CLOCK_GATING, data);
150 
151 		tmp = data = RREG32(mmVCE_UENC_CLOCK_GATING_2);
152 		data |= 0x2;
153 		data &= ~0x00010000;
154 		if (tmp != data)
155 			WREG32(mmVCE_UENC_CLOCK_GATING_2, data);
156 
157 		tmp = data = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
158 		data |= 0x37f;
159 		if (tmp != data)
160 			WREG32(mmVCE_UENC_REG_CLOCK_GATING, data);
161 
162 		tmp = data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL);
163 		data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
164 			VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
165 			VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
166 			0x8;
167 		if (tmp != data)
168 			WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data);
169 	} else {
170 		tmp = data = RREG32(mmVCE_CLOCK_GATING_B);
171 		data &= ~0x80010;
172 		data |= 0xe70008;
173 		if (tmp != data)
174 			WREG32(mmVCE_CLOCK_GATING_B, data);
175 
176 		tmp = data = RREG32(mmVCE_UENC_CLOCK_GATING);
177 		data |= 0xffc00000;
178 		if (tmp != data)
179 			WREG32(mmVCE_UENC_CLOCK_GATING, data);
180 
181 		tmp = data = RREG32(mmVCE_UENC_CLOCK_GATING_2);
182 		data |= 0x10000;
183 		if (tmp != data)
184 			WREG32(mmVCE_UENC_CLOCK_GATING_2, data);
185 
186 		tmp = data = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
187 		data &= ~0xffc00000;
188 		if (tmp != data)
189 			WREG32(mmVCE_UENC_REG_CLOCK_GATING, data);
190 
191 		tmp = data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL);
192 		data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
193 			  VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
194 			  VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
195 			  0x8);
196 		if (tmp != data)
197 			WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data);
198 	}
199 	vce_v3_0_override_vce_clock_gating(adev, false);
200 }
201 
202 static int vce_v3_0_firmware_loaded(struct amdgpu_device *adev)
203 {
204 	int i, j;
205 
206 	for (i = 0; i < 10; ++i) {
207 		for (j = 0; j < 100; ++j) {
208 			uint32_t status = RREG32(mmVCE_STATUS);
209 
210 			if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
211 				return 0;
212 			mdelay(10);
213 		}
214 
215 		DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
216 		WREG32_P(mmVCE_SOFT_RESET,
217 			VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
218 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
219 		mdelay(10);
220 		WREG32_P(mmVCE_SOFT_RESET, 0,
221 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
222 		mdelay(10);
223 	}
224 
225 	return -ETIMEDOUT;
226 }
227 
228 /**
229  * vce_v3_0_start - start VCE block
230  *
231  * @adev: amdgpu_device pointer
232  *
233  * Setup and start the VCE block
234  */
235 static int vce_v3_0_start(struct amdgpu_device *adev)
236 {
237 	struct amdgpu_ring *ring;
238 	int idx, r;
239 
240 	ring = &adev->vce.ring[0];
241 	WREG32(mmVCE_RB_RPTR, ring->wptr);
242 	WREG32(mmVCE_RB_WPTR, ring->wptr);
243 	WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr);
244 	WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
245 	WREG32(mmVCE_RB_SIZE, ring->ring_size / 4);
246 
247 	ring = &adev->vce.ring[1];
248 	WREG32(mmVCE_RB_RPTR2, ring->wptr);
249 	WREG32(mmVCE_RB_WPTR2, ring->wptr);
250 	WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr);
251 	WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
252 	WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4);
253 
254 	mutex_lock(&adev->grbm_idx_mutex);
255 	for (idx = 0; idx < 2; ++idx) {
256 		if (adev->vce.harvest_config & (1 << idx))
257 			continue;
258 
259 		if (idx == 0)
260 			WREG32_P(mmGRBM_GFX_INDEX, 0,
261 				~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
262 		else
263 			WREG32_P(mmGRBM_GFX_INDEX,
264 				GRBM_GFX_INDEX__VCE_INSTANCE_MASK,
265 				~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
266 
267 		vce_v3_0_mc_resume(adev, idx);
268 
269 		WREG32_P(mmVCE_STATUS, VCE_STATUS__JOB_BUSY_MASK,
270 		         ~VCE_STATUS__JOB_BUSY_MASK);
271 
272 		if (adev->asic_type >= CHIP_STONEY)
273 			WREG32_P(mmVCE_VCPU_CNTL, 1, ~0x200001);
274 		else
275 			WREG32_P(mmVCE_VCPU_CNTL, VCE_VCPU_CNTL__CLK_EN_MASK,
276 				~VCE_VCPU_CNTL__CLK_EN_MASK);
277 
278 		WREG32_P(mmVCE_SOFT_RESET, 0,
279 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
280 
281 		mdelay(100);
282 
283 		r = vce_v3_0_firmware_loaded(adev);
284 
285 		/* clear BUSY flag */
286 		WREG32_P(mmVCE_STATUS, 0, ~VCE_STATUS__JOB_BUSY_MASK);
287 
288 		if (r) {
289 			DRM_ERROR("VCE not responding, giving up!!!\n");
290 			mutex_unlock(&adev->grbm_idx_mutex);
291 			return r;
292 		}
293 	}
294 
295 	WREG32_P(mmGRBM_GFX_INDEX, 0, ~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
296 	mutex_unlock(&adev->grbm_idx_mutex);
297 
298 	return 0;
299 }
300 
301 static int vce_v3_0_stop(struct amdgpu_device *adev)
302 {
303 	int idx;
304 
305 	mutex_lock(&adev->grbm_idx_mutex);
306 	for (idx = 0; idx < 2; ++idx) {
307 		if (adev->vce.harvest_config & (1 << idx))
308 			continue;
309 
310 		if (idx == 0)
311 			WREG32_P(mmGRBM_GFX_INDEX, 0,
312 				~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
313 		else
314 			WREG32_P(mmGRBM_GFX_INDEX,
315 				GRBM_GFX_INDEX__VCE_INSTANCE_MASK,
316 				~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
317 
318 		if (adev->asic_type >= CHIP_STONEY)
319 			WREG32_P(mmVCE_VCPU_CNTL, 0, ~0x200001);
320 		else
321 			WREG32_P(mmVCE_VCPU_CNTL, 0,
322 				~VCE_VCPU_CNTL__CLK_EN_MASK);
323 		/* hold on ECPU */
324 		WREG32_P(mmVCE_SOFT_RESET,
325 			 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
326 			 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
327 
328 		/* clear BUSY flag */
329 		WREG32_P(mmVCE_STATUS, 0, ~VCE_STATUS__JOB_BUSY_MASK);
330 
331 		/* Set Clock-Gating off */
332 		if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
333 			vce_v3_0_set_vce_sw_clock_gating(adev, false);
334 	}
335 
336 	WREG32_P(mmGRBM_GFX_INDEX, 0, ~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
337 	mutex_unlock(&adev->grbm_idx_mutex);
338 
339 	return 0;
340 }
341 
342 #define ixVCE_HARVEST_FUSE_MACRO__ADDRESS     0xC0014074
343 #define VCE_HARVEST_FUSE_MACRO__SHIFT       27
344 #define VCE_HARVEST_FUSE_MACRO__MASK        0x18000000
345 
346 static unsigned vce_v3_0_get_harvest_config(struct amdgpu_device *adev)
347 {
348 	u32 tmp;
349 
350 	/* Fiji, Stoney, Polaris10, Polaris11 are single pipe */
351 	if ((adev->asic_type == CHIP_FIJI) ||
352 	    (adev->asic_type == CHIP_STONEY) ||
353 	    (adev->asic_type == CHIP_POLARIS10) ||
354 	    (adev->asic_type == CHIP_POLARIS11))
355 		return AMDGPU_VCE_HARVEST_VCE1;
356 
357 	/* Tonga and CZ are dual or single pipe */
358 	if (adev->flags & AMD_IS_APU)
359 		tmp = (RREG32_SMC(ixVCE_HARVEST_FUSE_MACRO__ADDRESS) &
360 		       VCE_HARVEST_FUSE_MACRO__MASK) >>
361 			VCE_HARVEST_FUSE_MACRO__SHIFT;
362 	else
363 		tmp = (RREG32_SMC(ixCC_HARVEST_FUSES) &
364 		       CC_HARVEST_FUSES__VCE_DISABLE_MASK) >>
365 			CC_HARVEST_FUSES__VCE_DISABLE__SHIFT;
366 
367 	switch (tmp) {
368 	case 1:
369 		return AMDGPU_VCE_HARVEST_VCE0;
370 	case 2:
371 		return AMDGPU_VCE_HARVEST_VCE1;
372 	case 3:
373 		return AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1;
374 	default:
375 		return 0;
376 	}
377 }
378 
379 static int vce_v3_0_early_init(void *handle)
380 {
381 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
382 
383 	adev->vce.harvest_config = vce_v3_0_get_harvest_config(adev);
384 
385 	if ((adev->vce.harvest_config &
386 	     (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1)) ==
387 	    (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1))
388 		return -ENOENT;
389 
390 	vce_v3_0_set_ring_funcs(adev);
391 	vce_v3_0_set_irq_funcs(adev);
392 
393 	return 0;
394 }
395 
396 static int vce_v3_0_sw_init(void *handle)
397 {
398 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
399 	struct amdgpu_ring *ring;
400 	int r;
401 
402 	/* VCE */
403 	r = amdgpu_irq_add_id(adev, 167, &adev->vce.irq);
404 	if (r)
405 		return r;
406 
407 	r = amdgpu_vce_sw_init(adev, VCE_V3_0_FW_SIZE +
408 		(VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE) * 2);
409 	if (r)
410 		return r;
411 
412 	r = amdgpu_vce_resume(adev);
413 	if (r)
414 		return r;
415 
416 	ring = &adev->vce.ring[0];
417 	sprintf(ring->name, "vce0");
418 	r = amdgpu_ring_init(adev, ring, 512, VCE_CMD_NO_OP, 0xf,
419 			     &adev->vce.irq, 0, AMDGPU_RING_TYPE_VCE);
420 	if (r)
421 		return r;
422 
423 	ring = &adev->vce.ring[1];
424 	sprintf(ring->name, "vce1");
425 	r = amdgpu_ring_init(adev, ring, 512, VCE_CMD_NO_OP, 0xf,
426 			     &adev->vce.irq, 0, AMDGPU_RING_TYPE_VCE);
427 	if (r)
428 		return r;
429 
430 	return r;
431 }
432 
433 static int vce_v3_0_sw_fini(void *handle)
434 {
435 	int r;
436 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
437 
438 	r = amdgpu_vce_suspend(adev);
439 	if (r)
440 		return r;
441 
442 	r = amdgpu_vce_sw_fini(adev);
443 	if (r)
444 		return r;
445 
446 	return r;
447 }
448 
449 static int vce_v3_0_hw_init(void *handle)
450 {
451 	int r, i;
452 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
453 
454 	r = vce_v3_0_start(adev);
455 	if (r)
456 		return r;
457 
458 	adev->vce.ring[0].ready = false;
459 	adev->vce.ring[1].ready = false;
460 
461 	for (i = 0; i < 2; i++) {
462 		r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
463 		if (r)
464 			return r;
465 		else
466 			adev->vce.ring[i].ready = true;
467 	}
468 
469 	DRM_INFO("VCE initialized successfully.\n");
470 
471 	return 0;
472 }
473 
474 static int vce_v3_0_hw_fini(void *handle)
475 {
476 	int r;
477 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
478 
479 	r = vce_v3_0_wait_for_idle(handle);
480 	if (r)
481 		return r;
482 
483 	return vce_v3_0_stop(adev);
484 }
485 
486 static int vce_v3_0_suspend(void *handle)
487 {
488 	int r;
489 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
490 
491 	r = vce_v3_0_hw_fini(adev);
492 	if (r)
493 		return r;
494 
495 	r = amdgpu_vce_suspend(adev);
496 	if (r)
497 		return r;
498 
499 	return r;
500 }
501 
502 static int vce_v3_0_resume(void *handle)
503 {
504 	int r;
505 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
506 
507 	r = amdgpu_vce_resume(adev);
508 	if (r)
509 		return r;
510 
511 	r = vce_v3_0_hw_init(adev);
512 	if (r)
513 		return r;
514 
515 	return r;
516 }
517 
518 static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx)
519 {
520 	uint32_t offset, size;
521 
522 	WREG32_P(mmVCE_CLOCK_GATING_A, 0, ~(1 << 16));
523 	WREG32_P(mmVCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000);
524 	WREG32_P(mmVCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F);
525 	WREG32(mmVCE_CLOCK_GATING_B, 0x1FF);
526 
527 	WREG32(mmVCE_LMI_CTRL, 0x00398000);
528 	WREG32_P(mmVCE_LMI_CACHE_CTRL, 0x0, ~0x1);
529 	WREG32(mmVCE_LMI_SWAP_CNTL, 0);
530 	WREG32(mmVCE_LMI_SWAP_CNTL1, 0);
531 	WREG32(mmVCE_LMI_VM_CTRL, 0);
532 	if (adev->asic_type >= CHIP_STONEY) {
533 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR0, (adev->vce.gpu_addr >> 8));
534 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR1, (adev->vce.gpu_addr >> 8));
535 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR2, (adev->vce.gpu_addr >> 8));
536 	} else
537 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8));
538 	offset = AMDGPU_VCE_FIRMWARE_OFFSET;
539 	size = VCE_V3_0_FW_SIZE;
540 	WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff);
541 	WREG32(mmVCE_VCPU_CACHE_SIZE0, size);
542 
543 	if (idx == 0) {
544 		offset += size;
545 		size = VCE_V3_0_STACK_SIZE;
546 		WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0x7fffffff);
547 		WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
548 		offset += size;
549 		size = VCE_V3_0_DATA_SIZE;
550 		WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0x7fffffff);
551 		WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
552 	} else {
553 		offset += size + VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE;
554 		size = VCE_V3_0_STACK_SIZE;
555 		WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0xfffffff);
556 		WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
557 		offset += size;
558 		size = VCE_V3_0_DATA_SIZE;
559 		WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0xfffffff);
560 		WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
561 	}
562 
563 	WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100);
564 
565 	WREG32_P(mmVCE_SYS_INT_EN, VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
566 		 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
567 }
568 
569 static bool vce_v3_0_is_idle(void *handle)
570 {
571 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
572 	u32 mask = 0;
573 
574 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
575 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
576 
577 	return !(RREG32(mmSRBM_STATUS2) & mask);
578 }
579 
580 static int vce_v3_0_wait_for_idle(void *handle)
581 {
582 	unsigned i;
583 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
584 
585 	for (i = 0; i < adev->usec_timeout; i++)
586 		if (vce_v3_0_is_idle(handle))
587 			return 0;
588 
589 	return -ETIMEDOUT;
590 }
591 
592 #define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
593 #define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
594 #define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
595 #define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
596 				      VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
597 
598 static int vce_v3_0_check_soft_reset(void *handle)
599 {
600 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
601 	u32 srbm_soft_reset = 0;
602 	u32 tmp;
603 
604 	/* VCE BUG: it is always busy, so skip its checking now */
605 	return 0;
606 
607 	/* According to VCE team , we should use VCE_STATUS instead
608 	 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
609 	 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
610 	 * instance's registers are accessed
611 	 * (0 for 1st instance, 10 for 2nd instance).
612 	 *
613 	 *VCE_STATUS
614 	 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
615 	 *|----+----+-----------+----+----+----+----------+---------+----|
616 	 *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
617 	 *
618 	 * VCE team suggest use bit 3--bit 6 for busy status check
619 	 */
620 	tmp = RREG32(mmGRBM_GFX_INDEX);
621 	tmp = REG_SET_FIELD(tmp, GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
622 	WREG32(mmGRBM_GFX_INDEX, tmp);
623 	if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
624 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
625 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
626 	}
627 	tmp = RREG32(mmGRBM_GFX_INDEX);
628 	tmp = REG_SET_FIELD(tmp, GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
629 	WREG32(mmGRBM_GFX_INDEX, tmp);
630 	if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
631 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
632 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
633 	}
634 	tmp = RREG32(mmGRBM_GFX_INDEX);
635 	tmp = REG_SET_FIELD(tmp, GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
636 	WREG32(mmGRBM_GFX_INDEX, tmp);
637 
638 	if (srbm_soft_reset) {
639 		adev->ip_block_status[AMD_IP_BLOCK_TYPE_VCE].hang = true;
640 		adev->vce.srbm_soft_reset = srbm_soft_reset;
641 	} else {
642 		adev->ip_block_status[AMD_IP_BLOCK_TYPE_VCE].hang = false;
643 		adev->vce.srbm_soft_reset = 0;
644 	}
645 	return 0;
646 }
647 
648 static int vce_v3_0_soft_reset(void *handle)
649 {
650 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
651 	u32 srbm_soft_reset;
652 
653 	if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_VCE].hang)
654 		return 0;
655 	srbm_soft_reset = adev->vce.srbm_soft_reset;
656 
657 	if (srbm_soft_reset) {
658 		u32 tmp;
659 
660 		tmp = RREG32(mmSRBM_SOFT_RESET);
661 		tmp |= srbm_soft_reset;
662 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
663 		WREG32(mmSRBM_SOFT_RESET, tmp);
664 		tmp = RREG32(mmSRBM_SOFT_RESET);
665 
666 		udelay(50);
667 
668 		tmp &= ~srbm_soft_reset;
669 		WREG32(mmSRBM_SOFT_RESET, tmp);
670 		tmp = RREG32(mmSRBM_SOFT_RESET);
671 
672 		/* Wait a little for things to settle down */
673 		udelay(50);
674 	}
675 
676 	return 0;
677 }
678 
679 static int vce_v3_0_pre_soft_reset(void *handle)
680 {
681 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
682 
683 	if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_VCE].hang)
684 		return 0;
685 
686 	mdelay(5);
687 
688 	return vce_v3_0_suspend(adev);
689 }
690 
691 
692 static int vce_v3_0_post_soft_reset(void *handle)
693 {
694 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
695 
696 	if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_VCE].hang)
697 		return 0;
698 
699 	mdelay(5);
700 
701 	return vce_v3_0_resume(adev);
702 }
703 
704 static int vce_v3_0_set_interrupt_state(struct amdgpu_device *adev,
705 					struct amdgpu_irq_src *source,
706 					unsigned type,
707 					enum amdgpu_interrupt_state state)
708 {
709 	uint32_t val = 0;
710 
711 	if (state == AMDGPU_IRQ_STATE_ENABLE)
712 		val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
713 
714 	WREG32_P(mmVCE_SYS_INT_EN, val, ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
715 	return 0;
716 }
717 
718 static int vce_v3_0_process_interrupt(struct amdgpu_device *adev,
719 				      struct amdgpu_irq_src *source,
720 				      struct amdgpu_iv_entry *entry)
721 {
722 	DRM_DEBUG("IH: VCE\n");
723 
724 	WREG32_P(mmVCE_SYS_INT_STATUS,
725 		VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK,
726 		~VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK);
727 
728 	switch (entry->src_data) {
729 	case 0:
730 	case 1:
731 		amdgpu_fence_process(&adev->vce.ring[entry->src_data]);
732 		break;
733 	default:
734 		DRM_ERROR("Unhandled interrupt: %d %d\n",
735 			  entry->src_id, entry->src_data);
736 		break;
737 	}
738 
739 	return 0;
740 }
741 
742 static void vce_v3_set_bypass_mode(struct amdgpu_device *adev, bool enable)
743 {
744 	u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
745 
746 	if (enable)
747 		tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
748 	else
749 		tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
750 
751 	WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
752 }
753 
754 static int vce_v3_0_set_clockgating_state(void *handle,
755 					  enum amd_clockgating_state state)
756 {
757 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
758 	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
759 	int i;
760 
761 	if (adev->asic_type == CHIP_POLARIS10)
762 		vce_v3_set_bypass_mode(adev, enable);
763 
764 	if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
765 		return 0;
766 
767 	mutex_lock(&adev->grbm_idx_mutex);
768 	for (i = 0; i < 2; i++) {
769 		/* Program VCE Instance 0 or 1 if not harvested */
770 		if (adev->vce.harvest_config & (1 << i))
771 			continue;
772 
773 		if (i == 0)
774 			WREG32_P(mmGRBM_GFX_INDEX, 0,
775 					~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
776 		else
777 			WREG32_P(mmGRBM_GFX_INDEX,
778 					GRBM_GFX_INDEX__VCE_INSTANCE_MASK,
779 					~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
780 
781 		if (enable) {
782 			/* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
783 			uint32_t data = RREG32(mmVCE_CLOCK_GATING_A);
784 			data &= ~(0xf | 0xff0);
785 			data |= ((0x0 << 0) | (0x04 << 4));
786 			WREG32(mmVCE_CLOCK_GATING_A, data);
787 
788 			/* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
789 			data = RREG32(mmVCE_UENC_CLOCK_GATING);
790 			data &= ~(0xf | 0xff0);
791 			data |= ((0x0 << 0) | (0x04 << 4));
792 			WREG32(mmVCE_UENC_CLOCK_GATING, data);
793 		}
794 
795 		vce_v3_0_set_vce_sw_clock_gating(adev, enable);
796 	}
797 
798 	WREG32_P(mmGRBM_GFX_INDEX, 0, ~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
799 	mutex_unlock(&adev->grbm_idx_mutex);
800 
801 	return 0;
802 }
803 
804 static int vce_v3_0_set_powergating_state(void *handle,
805 					  enum amd_powergating_state state)
806 {
807 	/* This doesn't actually powergate the VCE block.
808 	 * That's done in the dpm code via the SMC.  This
809 	 * just re-inits the block as necessary.  The actual
810 	 * gating still happens in the dpm code.  We should
811 	 * revisit this when there is a cleaner line between
812 	 * the smc and the hw blocks
813 	 */
814 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
815 
816 	if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE))
817 		return 0;
818 
819 	if (state == AMD_PG_STATE_GATE)
820 		/* XXX do we need a vce_v3_0_stop()? */
821 		return 0;
822 	else
823 		return vce_v3_0_start(adev);
824 }
825 
826 const struct amd_ip_funcs vce_v3_0_ip_funcs = {
827 	.name = "vce_v3_0",
828 	.early_init = vce_v3_0_early_init,
829 	.late_init = NULL,
830 	.sw_init = vce_v3_0_sw_init,
831 	.sw_fini = vce_v3_0_sw_fini,
832 	.hw_init = vce_v3_0_hw_init,
833 	.hw_fini = vce_v3_0_hw_fini,
834 	.suspend = vce_v3_0_suspend,
835 	.resume = vce_v3_0_resume,
836 	.is_idle = vce_v3_0_is_idle,
837 	.wait_for_idle = vce_v3_0_wait_for_idle,
838 	.check_soft_reset = vce_v3_0_check_soft_reset,
839 	.pre_soft_reset = vce_v3_0_pre_soft_reset,
840 	.soft_reset = vce_v3_0_soft_reset,
841 	.post_soft_reset = vce_v3_0_post_soft_reset,
842 	.set_clockgating_state = vce_v3_0_set_clockgating_state,
843 	.set_powergating_state = vce_v3_0_set_powergating_state,
844 };
845 
846 static const struct amdgpu_ring_funcs vce_v3_0_ring_funcs = {
847 	.get_rptr = vce_v3_0_ring_get_rptr,
848 	.get_wptr = vce_v3_0_ring_get_wptr,
849 	.set_wptr = vce_v3_0_ring_set_wptr,
850 	.parse_cs = amdgpu_vce_ring_parse_cs,
851 	.emit_ib = amdgpu_vce_ring_emit_ib,
852 	.emit_fence = amdgpu_vce_ring_emit_fence,
853 	.test_ring = amdgpu_vce_ring_test_ring,
854 	.test_ib = amdgpu_vce_ring_test_ib,
855 	.insert_nop = amdgpu_ring_insert_nop,
856 	.pad_ib = amdgpu_ring_generic_pad_ib,
857 	.begin_use = amdgpu_vce_ring_begin_use,
858 	.end_use = amdgpu_vce_ring_end_use,
859 };
860 
861 static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev)
862 {
863 	adev->vce.ring[0].funcs = &vce_v3_0_ring_funcs;
864 	adev->vce.ring[1].funcs = &vce_v3_0_ring_funcs;
865 }
866 
867 static const struct amdgpu_irq_src_funcs vce_v3_0_irq_funcs = {
868 	.set = vce_v3_0_set_interrupt_state,
869 	.process = vce_v3_0_process_interrupt,
870 };
871 
872 static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev)
873 {
874 	adev->vce.irq.num_types = 1;
875 	adev->vce.irq.funcs = &vce_v3_0_irq_funcs;
876 };
877