xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c (revision 9c0171b4)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  * Authors: Christian König <christian.koenig@amd.com>
26  */
27 
28 #include <linux/firmware.h>
29 #include <drm/drmP.h>
30 #include "amdgpu.h"
31 #include "amdgpu_vce.h"
32 #include "vid.h"
33 #include "vce/vce_3_0_d.h"
34 #include "vce/vce_3_0_sh_mask.h"
35 #include "oss/oss_3_0_d.h"
36 #include "oss/oss_3_0_sh_mask.h"
37 #include "gca/gfx_8_0_d.h"
38 #include "smu/smu_7_1_2_d.h"
39 #include "smu/smu_7_1_2_sh_mask.h"
40 #include "gca/gfx_8_0_d.h"
41 #include "gca/gfx_8_0_sh_mask.h"
42 
43 
44 #define GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT	0x04
45 #define GRBM_GFX_INDEX__VCE_INSTANCE_MASK	0x10
46 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR0	0x8616
47 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR1	0x8617
48 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR2	0x8618
49 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
50 
51 #define VCE_V3_0_FW_SIZE	(384 * 1024)
52 #define VCE_V3_0_STACK_SIZE	(64 * 1024)
53 #define VCE_V3_0_DATA_SIZE	((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
54 
55 static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx);
56 static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev);
57 static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev);
58 static int vce_v3_0_wait_for_idle(void *handle);
59 
60 /**
61  * vce_v3_0_ring_get_rptr - get read pointer
62  *
63  * @ring: amdgpu_ring pointer
64  *
65  * Returns the current hardware read pointer
66  */
67 static uint32_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring)
68 {
69 	struct amdgpu_device *adev = ring->adev;
70 
71 	if (ring == &adev->vce.ring[0])
72 		return RREG32(mmVCE_RB_RPTR);
73 	else
74 		return RREG32(mmVCE_RB_RPTR2);
75 }
76 
77 /**
78  * vce_v3_0_ring_get_wptr - get write pointer
79  *
80  * @ring: amdgpu_ring pointer
81  *
82  * Returns the current hardware write pointer
83  */
84 static uint32_t vce_v3_0_ring_get_wptr(struct amdgpu_ring *ring)
85 {
86 	struct amdgpu_device *adev = ring->adev;
87 
88 	if (ring == &adev->vce.ring[0])
89 		return RREG32(mmVCE_RB_WPTR);
90 	else
91 		return RREG32(mmVCE_RB_WPTR2);
92 }
93 
94 /**
95  * vce_v3_0_ring_set_wptr - set write pointer
96  *
97  * @ring: amdgpu_ring pointer
98  *
99  * Commits the write pointer to the hardware
100  */
101 static void vce_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
102 {
103 	struct amdgpu_device *adev = ring->adev;
104 
105 	if (ring == &adev->vce.ring[0])
106 		WREG32(mmVCE_RB_WPTR, ring->wptr);
107 	else
108 		WREG32(mmVCE_RB_WPTR2, ring->wptr);
109 }
110 
111 static void vce_v3_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
112 {
113 	u32 tmp, data;
114 
115 	tmp = data = RREG32(mmVCE_RB_ARB_CTRL);
116 	if (override)
117 		data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
118 	else
119 		data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
120 
121 	if (tmp != data)
122 		WREG32(mmVCE_RB_ARB_CTRL, data);
123 }
124 
125 static void vce_v3_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
126 					     bool gated)
127 {
128 	u32 tmp, data;
129 
130 	/* Set Override to disable Clock Gating */
131 	vce_v3_0_override_vce_clock_gating(adev, true);
132 
133 	/* This function enables MGCG which is controlled by firmware.
134 	   With the clocks in the gated state the core is still
135 	   accessible but the firmware will throttle the clocks on the
136 	   fly as necessary.
137 	*/
138 	if (gated) {
139 		tmp = data = RREG32(mmVCE_CLOCK_GATING_B);
140 		data |= 0x1ff;
141 		data &= ~0xef0000;
142 		if (tmp != data)
143 			WREG32(mmVCE_CLOCK_GATING_B, data);
144 
145 		tmp = data = RREG32(mmVCE_UENC_CLOCK_GATING);
146 		data |= 0x3ff000;
147 		data &= ~0xffc00000;
148 		if (tmp != data)
149 			WREG32(mmVCE_UENC_CLOCK_GATING, data);
150 
151 		tmp = data = RREG32(mmVCE_UENC_CLOCK_GATING_2);
152 		data |= 0x2;
153 		data &= ~0x00010000;
154 		if (tmp != data)
155 			WREG32(mmVCE_UENC_CLOCK_GATING_2, data);
156 
157 		tmp = data = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
158 		data |= 0x37f;
159 		if (tmp != data)
160 			WREG32(mmVCE_UENC_REG_CLOCK_GATING, data);
161 
162 		tmp = data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL);
163 		data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
164 			VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
165 			VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
166 			0x8;
167 		if (tmp != data)
168 			WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data);
169 	} else {
170 		tmp = data = RREG32(mmVCE_CLOCK_GATING_B);
171 		data &= ~0x80010;
172 		data |= 0xe70008;
173 		if (tmp != data)
174 			WREG32(mmVCE_CLOCK_GATING_B, data);
175 
176 		tmp = data = RREG32(mmVCE_UENC_CLOCK_GATING);
177 		data |= 0xffc00000;
178 		if (tmp != data)
179 			WREG32(mmVCE_UENC_CLOCK_GATING, data);
180 
181 		tmp = data = RREG32(mmVCE_UENC_CLOCK_GATING_2);
182 		data |= 0x10000;
183 		if (tmp != data)
184 			WREG32(mmVCE_UENC_CLOCK_GATING_2, data);
185 
186 		tmp = data = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
187 		data &= ~0xffc00000;
188 		if (tmp != data)
189 			WREG32(mmVCE_UENC_REG_CLOCK_GATING, data);
190 
191 		tmp = data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL);
192 		data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
193 			  VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
194 			  VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
195 			  0x8);
196 		if (tmp != data)
197 			WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data);
198 	}
199 	vce_v3_0_override_vce_clock_gating(adev, false);
200 }
201 
202 static int vce_v3_0_firmware_loaded(struct amdgpu_device *adev)
203 {
204 	int i, j;
205 
206 	for (i = 0; i < 10; ++i) {
207 		for (j = 0; j < 100; ++j) {
208 			uint32_t status = RREG32(mmVCE_STATUS);
209 
210 			if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
211 				return 0;
212 			mdelay(10);
213 		}
214 
215 		DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
216 		WREG32_P(mmVCE_SOFT_RESET,
217 			VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
218 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
219 		mdelay(10);
220 		WREG32_P(mmVCE_SOFT_RESET, 0,
221 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
222 		mdelay(10);
223 	}
224 
225 	return -ETIMEDOUT;
226 }
227 
228 /**
229  * vce_v3_0_start - start VCE block
230  *
231  * @adev: amdgpu_device pointer
232  *
233  * Setup and start the VCE block
234  */
235 static int vce_v3_0_start(struct amdgpu_device *adev)
236 {
237 	struct amdgpu_ring *ring;
238 	int idx, r;
239 
240 	ring = &adev->vce.ring[0];
241 	WREG32(mmVCE_RB_RPTR, ring->wptr);
242 	WREG32(mmVCE_RB_WPTR, ring->wptr);
243 	WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr);
244 	WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
245 	WREG32(mmVCE_RB_SIZE, ring->ring_size / 4);
246 
247 	ring = &adev->vce.ring[1];
248 	WREG32(mmVCE_RB_RPTR2, ring->wptr);
249 	WREG32(mmVCE_RB_WPTR2, ring->wptr);
250 	WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr);
251 	WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
252 	WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4);
253 
254 	mutex_lock(&adev->grbm_idx_mutex);
255 	for (idx = 0; idx < 2; ++idx) {
256 		if (adev->vce.harvest_config & (1 << idx))
257 			continue;
258 
259 		if (idx == 0)
260 			WREG32_P(mmGRBM_GFX_INDEX, 0,
261 				~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
262 		else
263 			WREG32_P(mmGRBM_GFX_INDEX,
264 				GRBM_GFX_INDEX__VCE_INSTANCE_MASK,
265 				~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
266 
267 		vce_v3_0_mc_resume(adev, idx);
268 
269 		WREG32_P(mmVCE_STATUS, VCE_STATUS__JOB_BUSY_MASK,
270 		         ~VCE_STATUS__JOB_BUSY_MASK);
271 
272 		if (adev->asic_type >= CHIP_STONEY)
273 			WREG32_P(mmVCE_VCPU_CNTL, 1, ~0x200001);
274 		else
275 			WREG32_P(mmVCE_VCPU_CNTL, VCE_VCPU_CNTL__CLK_EN_MASK,
276 				~VCE_VCPU_CNTL__CLK_EN_MASK);
277 
278 		WREG32_P(mmVCE_SOFT_RESET, 0,
279 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
280 
281 		mdelay(100);
282 
283 		r = vce_v3_0_firmware_loaded(adev);
284 
285 		/* clear BUSY flag */
286 		WREG32_P(mmVCE_STATUS, 0, ~VCE_STATUS__JOB_BUSY_MASK);
287 
288 		if (r) {
289 			DRM_ERROR("VCE not responding, giving up!!!\n");
290 			mutex_unlock(&adev->grbm_idx_mutex);
291 			return r;
292 		}
293 	}
294 
295 	WREG32_P(mmGRBM_GFX_INDEX, 0, ~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
296 	mutex_unlock(&adev->grbm_idx_mutex);
297 
298 	return 0;
299 }
300 
301 static int vce_v3_0_stop(struct amdgpu_device *adev)
302 {
303 	int idx;
304 
305 	mutex_lock(&adev->grbm_idx_mutex);
306 	for (idx = 0; idx < 2; ++idx) {
307 		if (adev->vce.harvest_config & (1 << idx))
308 			continue;
309 
310 		if (idx == 0)
311 			WREG32_P(mmGRBM_GFX_INDEX, 0,
312 				~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
313 		else
314 			WREG32_P(mmGRBM_GFX_INDEX,
315 				GRBM_GFX_INDEX__VCE_INSTANCE_MASK,
316 				~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
317 
318 		if (adev->asic_type >= CHIP_STONEY)
319 			WREG32_P(mmVCE_VCPU_CNTL, 0, ~0x200001);
320 		else
321 			WREG32_P(mmVCE_VCPU_CNTL, 0,
322 				~VCE_VCPU_CNTL__CLK_EN_MASK);
323 		/* hold on ECPU */
324 		WREG32_P(mmVCE_SOFT_RESET,
325 			 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
326 			 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
327 
328 		/* clear BUSY flag */
329 		WREG32_P(mmVCE_STATUS, 0, ~VCE_STATUS__JOB_BUSY_MASK);
330 
331 		/* Set Clock-Gating off */
332 		if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
333 			vce_v3_0_set_vce_sw_clock_gating(adev, false);
334 	}
335 
336 	WREG32_P(mmGRBM_GFX_INDEX, 0, ~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
337 	mutex_unlock(&adev->grbm_idx_mutex);
338 
339 	return 0;
340 }
341 
342 #define ixVCE_HARVEST_FUSE_MACRO__ADDRESS     0xC0014074
343 #define VCE_HARVEST_FUSE_MACRO__SHIFT       27
344 #define VCE_HARVEST_FUSE_MACRO__MASK        0x18000000
345 
346 static unsigned vce_v3_0_get_harvest_config(struct amdgpu_device *adev)
347 {
348 	u32 tmp;
349 
350 	/* Fiji, Stoney, Polaris10, Polaris11 are single pipe */
351 	if ((adev->asic_type == CHIP_FIJI) ||
352 	    (adev->asic_type == CHIP_STONEY) ||
353 	    (adev->asic_type == CHIP_POLARIS10) ||
354 	    (adev->asic_type == CHIP_POLARIS11))
355 		return AMDGPU_VCE_HARVEST_VCE1;
356 
357 	/* Tonga and CZ are dual or single pipe */
358 	if (adev->flags & AMD_IS_APU)
359 		tmp = (RREG32_SMC(ixVCE_HARVEST_FUSE_MACRO__ADDRESS) &
360 		       VCE_HARVEST_FUSE_MACRO__MASK) >>
361 			VCE_HARVEST_FUSE_MACRO__SHIFT;
362 	else
363 		tmp = (RREG32_SMC(ixCC_HARVEST_FUSES) &
364 		       CC_HARVEST_FUSES__VCE_DISABLE_MASK) >>
365 			CC_HARVEST_FUSES__VCE_DISABLE__SHIFT;
366 
367 	switch (tmp) {
368 	case 1:
369 		return AMDGPU_VCE_HARVEST_VCE0;
370 	case 2:
371 		return AMDGPU_VCE_HARVEST_VCE1;
372 	case 3:
373 		return AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1;
374 	default:
375 		return 0;
376 	}
377 }
378 
379 static int vce_v3_0_early_init(void *handle)
380 {
381 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
382 
383 	adev->vce.harvest_config = vce_v3_0_get_harvest_config(adev);
384 
385 	if ((adev->vce.harvest_config &
386 	     (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1)) ==
387 	    (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1))
388 		return -ENOENT;
389 
390 	vce_v3_0_set_ring_funcs(adev);
391 	vce_v3_0_set_irq_funcs(adev);
392 
393 	return 0;
394 }
395 
396 static int vce_v3_0_sw_init(void *handle)
397 {
398 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
399 	struct amdgpu_ring *ring;
400 	int r;
401 
402 	/* VCE */
403 	r = amdgpu_irq_add_id(adev, 167, &adev->vce.irq);
404 	if (r)
405 		return r;
406 
407 	r = amdgpu_vce_sw_init(adev, VCE_V3_0_FW_SIZE +
408 		(VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE) * 2);
409 	if (r)
410 		return r;
411 
412 	r = amdgpu_vce_resume(adev);
413 	if (r)
414 		return r;
415 
416 	ring = &adev->vce.ring[0];
417 	sprintf(ring->name, "vce0");
418 	r = amdgpu_ring_init(adev, ring, 512, VCE_CMD_NO_OP, 0xf,
419 			     &adev->vce.irq, 0, AMDGPU_RING_TYPE_VCE);
420 	if (r)
421 		return r;
422 
423 	ring = &adev->vce.ring[1];
424 	sprintf(ring->name, "vce1");
425 	r = amdgpu_ring_init(adev, ring, 512, VCE_CMD_NO_OP, 0xf,
426 			     &adev->vce.irq, 0, AMDGPU_RING_TYPE_VCE);
427 	if (r)
428 		return r;
429 
430 	return r;
431 }
432 
433 static int vce_v3_0_sw_fini(void *handle)
434 {
435 	int r;
436 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
437 
438 	r = amdgpu_vce_suspend(adev);
439 	if (r)
440 		return r;
441 
442 	r = amdgpu_vce_sw_fini(adev);
443 	if (r)
444 		return r;
445 
446 	return r;
447 }
448 
449 static int vce_v3_0_hw_init(void *handle)
450 {
451 	int r, i;
452 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
453 
454 	r = vce_v3_0_start(adev);
455 	if (r)
456 		return r;
457 
458 	adev->vce.ring[0].ready = false;
459 	adev->vce.ring[1].ready = false;
460 
461 	for (i = 0; i < 2; i++) {
462 		r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
463 		if (r)
464 			return r;
465 		else
466 			adev->vce.ring[i].ready = true;
467 	}
468 
469 	DRM_INFO("VCE initialized successfully.\n");
470 
471 	return 0;
472 }
473 
474 static int vce_v3_0_hw_fini(void *handle)
475 {
476 	int r;
477 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
478 
479 	r = vce_v3_0_wait_for_idle(handle);
480 	if (r)
481 		return r;
482 
483 	return vce_v3_0_stop(adev);
484 }
485 
486 static int vce_v3_0_suspend(void *handle)
487 {
488 	int r;
489 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
490 
491 	r = vce_v3_0_hw_fini(adev);
492 	if (r)
493 		return r;
494 
495 	r = amdgpu_vce_suspend(adev);
496 	if (r)
497 		return r;
498 
499 	return r;
500 }
501 
502 static int vce_v3_0_resume(void *handle)
503 {
504 	int r;
505 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
506 
507 	r = amdgpu_vce_resume(adev);
508 	if (r)
509 		return r;
510 
511 	r = vce_v3_0_hw_init(adev);
512 	if (r)
513 		return r;
514 
515 	return r;
516 }
517 
518 static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx)
519 {
520 	uint32_t offset, size;
521 
522 	WREG32_P(mmVCE_CLOCK_GATING_A, 0, ~(1 << 16));
523 	WREG32_P(mmVCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000);
524 	WREG32_P(mmVCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F);
525 	WREG32(mmVCE_CLOCK_GATING_B, 0x1FF);
526 
527 	WREG32(mmVCE_LMI_CTRL, 0x00398000);
528 	WREG32_P(mmVCE_LMI_CACHE_CTRL, 0x0, ~0x1);
529 	WREG32(mmVCE_LMI_SWAP_CNTL, 0);
530 	WREG32(mmVCE_LMI_SWAP_CNTL1, 0);
531 	WREG32(mmVCE_LMI_VM_CTRL, 0);
532 	if (adev->asic_type >= CHIP_STONEY) {
533 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR0, (adev->vce.gpu_addr >> 8));
534 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR1, (adev->vce.gpu_addr >> 8));
535 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR2, (adev->vce.gpu_addr >> 8));
536 	} else
537 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8));
538 	offset = AMDGPU_VCE_FIRMWARE_OFFSET;
539 	size = VCE_V3_0_FW_SIZE;
540 	WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff);
541 	WREG32(mmVCE_VCPU_CACHE_SIZE0, size);
542 
543 	if (idx == 0) {
544 		offset += size;
545 		size = VCE_V3_0_STACK_SIZE;
546 		WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0x7fffffff);
547 		WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
548 		offset += size;
549 		size = VCE_V3_0_DATA_SIZE;
550 		WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0x7fffffff);
551 		WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
552 	} else {
553 		offset += size + VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE;
554 		size = VCE_V3_0_STACK_SIZE;
555 		WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0xfffffff);
556 		WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
557 		offset += size;
558 		size = VCE_V3_0_DATA_SIZE;
559 		WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0xfffffff);
560 		WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
561 	}
562 
563 	WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100);
564 
565 	WREG32_P(mmVCE_SYS_INT_EN, VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
566 		 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
567 }
568 
569 static bool vce_v3_0_is_idle(void *handle)
570 {
571 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
572 	u32 mask = 0;
573 
574 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
575 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
576 
577 	return !(RREG32(mmSRBM_STATUS2) & mask);
578 }
579 
580 static int vce_v3_0_wait_for_idle(void *handle)
581 {
582 	unsigned i;
583 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
584 
585 	for (i = 0; i < adev->usec_timeout; i++)
586 		if (vce_v3_0_is_idle(handle))
587 			return 0;
588 
589 	return -ETIMEDOUT;
590 }
591 
592 #define AMDGPU_VCE_STATUS_BUSY_MASK    0x78
593 
594 static int vce_v3_0_check_soft_reset(void *handle)
595 {
596 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
597 	u32 srbm_soft_reset = 0;
598 	u32 tmp;
599 
600 	/* VCE BUG: it is always busy, so skip its checking now */
601 	return 0;
602 
603 	/* According to VCE team , we should use VCE_STATUS instead
604 	 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
605 	 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
606 	 * instance's registers are accessed
607 	 * (0 for 1st instance, 10 for 2nd instance).
608 	 *
609 	 *VCE_STATUS
610 	 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
611 	 *|----+----+-----------+----+----+----+----------+---------+----|
612 	 *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
613 	 *
614 	 * VCE team suggest use bit 3--bit 6 for busy status check
615 	 */
616 	tmp = RREG32(mmGRBM_GFX_INDEX);
617 	tmp = REG_SET_FIELD(tmp, GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
618 	WREG32(mmGRBM_GFX_INDEX, tmp);
619 	if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
620 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
621 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
622 	}
623 	tmp = RREG32(mmGRBM_GFX_INDEX);
624 	tmp = REG_SET_FIELD(tmp, GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
625 	WREG32(mmGRBM_GFX_INDEX, tmp);
626 	if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
627 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
628 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
629 	}
630 	tmp = RREG32(mmGRBM_GFX_INDEX);
631 	tmp = REG_SET_FIELD(tmp, GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
632 	WREG32(mmGRBM_GFX_INDEX, tmp);
633 
634 	if (srbm_soft_reset) {
635 		adev->ip_block_status[AMD_IP_BLOCK_TYPE_VCE].hang = true;
636 		adev->vce.srbm_soft_reset = srbm_soft_reset;
637 	} else {
638 		adev->ip_block_status[AMD_IP_BLOCK_TYPE_VCE].hang = false;
639 		adev->vce.srbm_soft_reset = 0;
640 	}
641 	return 0;
642 }
643 
644 static int vce_v3_0_soft_reset(void *handle)
645 {
646 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
647 	u32 srbm_soft_reset;
648 
649 	if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_VCE].hang)
650 		return 0;
651 	srbm_soft_reset = adev->vce.srbm_soft_reset;
652 
653 	if (srbm_soft_reset) {
654 		u32 tmp;
655 
656 		tmp = RREG32(mmSRBM_SOFT_RESET);
657 		tmp |= srbm_soft_reset;
658 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
659 		WREG32(mmSRBM_SOFT_RESET, tmp);
660 		tmp = RREG32(mmSRBM_SOFT_RESET);
661 
662 		udelay(50);
663 
664 		tmp &= ~srbm_soft_reset;
665 		WREG32(mmSRBM_SOFT_RESET, tmp);
666 		tmp = RREG32(mmSRBM_SOFT_RESET);
667 
668 		/* Wait a little for things to settle down */
669 		udelay(50);
670 	}
671 
672 	return 0;
673 }
674 
675 static int vce_v3_0_pre_soft_reset(void *handle)
676 {
677 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
678 
679 	if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_VCE].hang)
680 		return 0;
681 
682 	mdelay(5);
683 
684 	return vce_v3_0_suspend(adev);
685 }
686 
687 
688 static int vce_v3_0_post_soft_reset(void *handle)
689 {
690 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
691 
692 	if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_VCE].hang)
693 		return 0;
694 
695 	mdelay(5);
696 
697 	return vce_v3_0_resume(adev);
698 }
699 
700 static int vce_v3_0_set_interrupt_state(struct amdgpu_device *adev,
701 					struct amdgpu_irq_src *source,
702 					unsigned type,
703 					enum amdgpu_interrupt_state state)
704 {
705 	uint32_t val = 0;
706 
707 	if (state == AMDGPU_IRQ_STATE_ENABLE)
708 		val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
709 
710 	WREG32_P(mmVCE_SYS_INT_EN, val, ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
711 	return 0;
712 }
713 
714 static int vce_v3_0_process_interrupt(struct amdgpu_device *adev,
715 				      struct amdgpu_irq_src *source,
716 				      struct amdgpu_iv_entry *entry)
717 {
718 	DRM_DEBUG("IH: VCE\n");
719 
720 	WREG32_P(mmVCE_SYS_INT_STATUS,
721 		VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK,
722 		~VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK);
723 
724 	switch (entry->src_data) {
725 	case 0:
726 	case 1:
727 		amdgpu_fence_process(&adev->vce.ring[entry->src_data]);
728 		break;
729 	default:
730 		DRM_ERROR("Unhandled interrupt: %d %d\n",
731 			  entry->src_id, entry->src_data);
732 		break;
733 	}
734 
735 	return 0;
736 }
737 
738 static void vce_v3_set_bypass_mode(struct amdgpu_device *adev, bool enable)
739 {
740 	u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
741 
742 	if (enable)
743 		tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
744 	else
745 		tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
746 
747 	WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
748 }
749 
750 static int vce_v3_0_set_clockgating_state(void *handle,
751 					  enum amd_clockgating_state state)
752 {
753 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
754 	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
755 	int i;
756 
757 	if (adev->asic_type == CHIP_POLARIS10)
758 		vce_v3_set_bypass_mode(adev, enable);
759 
760 	if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
761 		return 0;
762 
763 	mutex_lock(&adev->grbm_idx_mutex);
764 	for (i = 0; i < 2; i++) {
765 		/* Program VCE Instance 0 or 1 if not harvested */
766 		if (adev->vce.harvest_config & (1 << i))
767 			continue;
768 
769 		if (i == 0)
770 			WREG32_P(mmGRBM_GFX_INDEX, 0,
771 					~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
772 		else
773 			WREG32_P(mmGRBM_GFX_INDEX,
774 					GRBM_GFX_INDEX__VCE_INSTANCE_MASK,
775 					~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
776 
777 		if (enable) {
778 			/* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
779 			uint32_t data = RREG32(mmVCE_CLOCK_GATING_A);
780 			data &= ~(0xf | 0xff0);
781 			data |= ((0x0 << 0) | (0x04 << 4));
782 			WREG32(mmVCE_CLOCK_GATING_A, data);
783 
784 			/* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
785 			data = RREG32(mmVCE_UENC_CLOCK_GATING);
786 			data &= ~(0xf | 0xff0);
787 			data |= ((0x0 << 0) | (0x04 << 4));
788 			WREG32(mmVCE_UENC_CLOCK_GATING, data);
789 		}
790 
791 		vce_v3_0_set_vce_sw_clock_gating(adev, enable);
792 	}
793 
794 	WREG32_P(mmGRBM_GFX_INDEX, 0, ~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
795 	mutex_unlock(&adev->grbm_idx_mutex);
796 
797 	return 0;
798 }
799 
800 static int vce_v3_0_set_powergating_state(void *handle,
801 					  enum amd_powergating_state state)
802 {
803 	/* This doesn't actually powergate the VCE block.
804 	 * That's done in the dpm code via the SMC.  This
805 	 * just re-inits the block as necessary.  The actual
806 	 * gating still happens in the dpm code.  We should
807 	 * revisit this when there is a cleaner line between
808 	 * the smc and the hw blocks
809 	 */
810 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
811 
812 	if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE))
813 		return 0;
814 
815 	if (state == AMD_PG_STATE_GATE)
816 		/* XXX do we need a vce_v3_0_stop()? */
817 		return 0;
818 	else
819 		return vce_v3_0_start(adev);
820 }
821 
822 const struct amd_ip_funcs vce_v3_0_ip_funcs = {
823 	.name = "vce_v3_0",
824 	.early_init = vce_v3_0_early_init,
825 	.late_init = NULL,
826 	.sw_init = vce_v3_0_sw_init,
827 	.sw_fini = vce_v3_0_sw_fini,
828 	.hw_init = vce_v3_0_hw_init,
829 	.hw_fini = vce_v3_0_hw_fini,
830 	.suspend = vce_v3_0_suspend,
831 	.resume = vce_v3_0_resume,
832 	.is_idle = vce_v3_0_is_idle,
833 	.wait_for_idle = vce_v3_0_wait_for_idle,
834 	.check_soft_reset = vce_v3_0_check_soft_reset,
835 	.pre_soft_reset = vce_v3_0_pre_soft_reset,
836 	.soft_reset = vce_v3_0_soft_reset,
837 	.post_soft_reset = vce_v3_0_post_soft_reset,
838 	.set_clockgating_state = vce_v3_0_set_clockgating_state,
839 	.set_powergating_state = vce_v3_0_set_powergating_state,
840 };
841 
842 static const struct amdgpu_ring_funcs vce_v3_0_ring_funcs = {
843 	.get_rptr = vce_v3_0_ring_get_rptr,
844 	.get_wptr = vce_v3_0_ring_get_wptr,
845 	.set_wptr = vce_v3_0_ring_set_wptr,
846 	.parse_cs = amdgpu_vce_ring_parse_cs,
847 	.emit_ib = amdgpu_vce_ring_emit_ib,
848 	.emit_fence = amdgpu_vce_ring_emit_fence,
849 	.test_ring = amdgpu_vce_ring_test_ring,
850 	.test_ib = amdgpu_vce_ring_test_ib,
851 	.insert_nop = amdgpu_ring_insert_nop,
852 	.pad_ib = amdgpu_ring_generic_pad_ib,
853 	.begin_use = amdgpu_vce_ring_begin_use,
854 	.end_use = amdgpu_vce_ring_end_use,
855 };
856 
857 static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev)
858 {
859 	adev->vce.ring[0].funcs = &vce_v3_0_ring_funcs;
860 	adev->vce.ring[1].funcs = &vce_v3_0_ring_funcs;
861 }
862 
863 static const struct amdgpu_irq_src_funcs vce_v3_0_irq_funcs = {
864 	.set = vce_v3_0_set_interrupt_state,
865 	.process = vce_v3_0_process_interrupt,
866 };
867 
868 static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev)
869 {
870 	adev->vce.irq.num_types = 1;
871 	adev->vce.irq.funcs = &vce_v3_0_irq_funcs;
872 };
873