xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c (revision f16fe6d3)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  * Authors: Christian König <christian.koenig@amd.com>
26  */
27 
28 #include <linux/firmware.h>
29 #include <drm/drmP.h>
30 #include "amdgpu.h"
31 #include "amdgpu_vce.h"
32 #include "vid.h"
33 #include "vce/vce_3_0_d.h"
34 #include "vce/vce_3_0_sh_mask.h"
35 #include "oss/oss_3_0_d.h"
36 #include "oss/oss_3_0_sh_mask.h"
37 #include "gca/gfx_8_0_d.h"
38 #include "smu/smu_7_1_2_d.h"
39 #include "smu/smu_7_1_2_sh_mask.h"
40 #include "gca/gfx_8_0_d.h"
41 #include "gca/gfx_8_0_sh_mask.h"
42 
43 
44 #define GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT	0x04
45 #define GRBM_GFX_INDEX__VCE_INSTANCE_MASK	0x10
46 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR0	0x8616
47 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR1	0x8617
48 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR2	0x8618
49 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
50 
51 #define VCE_V3_0_FW_SIZE	(384 * 1024)
52 #define VCE_V3_0_STACK_SIZE	(64 * 1024)
53 #define VCE_V3_0_DATA_SIZE	((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
54 
55 static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx);
56 static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev);
57 static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev);
58 static int vce_v3_0_wait_for_idle(void *handle);
59 
60 /**
61  * vce_v3_0_ring_get_rptr - get read pointer
62  *
63  * @ring: amdgpu_ring pointer
64  *
65  * Returns the current hardware read pointer
66  */
67 static uint32_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring)
68 {
69 	struct amdgpu_device *adev = ring->adev;
70 
71 	if (ring == &adev->vce.ring[0])
72 		return RREG32(mmVCE_RB_RPTR);
73 	else
74 		return RREG32(mmVCE_RB_RPTR2);
75 }
76 
77 /**
78  * vce_v3_0_ring_get_wptr - get write pointer
79  *
80  * @ring: amdgpu_ring pointer
81  *
82  * Returns the current hardware write pointer
83  */
84 static uint32_t vce_v3_0_ring_get_wptr(struct amdgpu_ring *ring)
85 {
86 	struct amdgpu_device *adev = ring->adev;
87 
88 	if (ring == &adev->vce.ring[0])
89 		return RREG32(mmVCE_RB_WPTR);
90 	else
91 		return RREG32(mmVCE_RB_WPTR2);
92 }
93 
94 /**
95  * vce_v3_0_ring_set_wptr - set write pointer
96  *
97  * @ring: amdgpu_ring pointer
98  *
99  * Commits the write pointer to the hardware
100  */
101 static void vce_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
102 {
103 	struct amdgpu_device *adev = ring->adev;
104 
105 	if (ring == &adev->vce.ring[0])
106 		WREG32(mmVCE_RB_WPTR, ring->wptr);
107 	else
108 		WREG32(mmVCE_RB_WPTR2, ring->wptr);
109 }
110 
111 static void vce_v3_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
112 {
113 	u32 tmp, data;
114 
115 	tmp = data = RREG32(mmVCE_RB_ARB_CTRL);
116 	if (override)
117 		data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
118 	else
119 		data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
120 
121 	if (tmp != data)
122 		WREG32(mmVCE_RB_ARB_CTRL, data);
123 }
124 
125 static void vce_v3_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
126 					     bool gated)
127 {
128 	u32 tmp, data;
129 
130 	/* Set Override to disable Clock Gating */
131 	vce_v3_0_override_vce_clock_gating(adev, true);
132 
133 	if (!gated) {
134 		/* Force CLOCK ON for VCE_CLOCK_GATING_B,
135 		 * {*_FORCE_ON, *_FORCE_OFF} = {1, 0}
136 		 * VREG can be FORCE ON or set to Dynamic, but can't be OFF
137 		 */
138 		tmp = data = RREG32(mmVCE_CLOCK_GATING_B);
139 		data |= 0x1ff;
140 		data &= ~0xef0000;
141 		if (tmp != data)
142 			WREG32(mmVCE_CLOCK_GATING_B, data);
143 
144 		/* Force CLOCK ON for VCE_UENC_CLOCK_GATING,
145 		 * {*_FORCE_ON, *_FORCE_OFF} = {1, 0}
146 		 */
147 		tmp = data = RREG32(mmVCE_UENC_CLOCK_GATING);
148 		data |= 0x3ff000;
149 		data &= ~0xffc00000;
150 		if (tmp != data)
151 			WREG32(mmVCE_UENC_CLOCK_GATING, data);
152 
153 		/* set VCE_UENC_CLOCK_GATING_2 */
154 		tmp = data = RREG32(mmVCE_UENC_CLOCK_GATING_2);
155 		data |= 0x2;
156 		data &= ~0x2;
157 		if (tmp != data)
158 			WREG32(mmVCE_UENC_CLOCK_GATING_2, data);
159 
160 		/* Force CLOCK ON for VCE_UENC_REG_CLOCK_GATING */
161 		tmp = data = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
162 		data |= 0x37f;
163 		if (tmp != data)
164 			WREG32(mmVCE_UENC_REG_CLOCK_GATING, data);
165 
166 		/* Force VCE_UENC_DMA_DCLK_CTRL Clock ON */
167 		tmp = data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL);
168 		data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
169 			VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
170 			VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
171 			0x8;
172 		if (tmp != data)
173 			WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data);
174 	} else {
175 		/* Force CLOCK OFF for VCE_CLOCK_GATING_B,
176 		 * {*, *_FORCE_OFF} = {*, 1}
177 		 * set VREG to Dynamic, as it can't be OFF
178 		 */
179 		tmp = data = RREG32(mmVCE_CLOCK_GATING_B);
180 		data &= ~0x80010;
181 		data |= 0xe70008;
182 		if (tmp != data)
183 			WREG32(mmVCE_CLOCK_GATING_B, data);
184 		/* Force CLOCK OFF for VCE_UENC_CLOCK_GATING,
185 		 * Force ClOCK OFF takes precedent over Force CLOCK ON setting.
186 		 * {*_FORCE_ON, *_FORCE_OFF} = {*, 1}
187 		 */
188 		tmp = data = RREG32(mmVCE_UENC_CLOCK_GATING);
189 		data |= 0xffc00000;
190 		if (tmp != data)
191 			WREG32(mmVCE_UENC_CLOCK_GATING, data);
192 		/* Set VCE_UENC_CLOCK_GATING_2 */
193 		tmp = data = RREG32(mmVCE_UENC_CLOCK_GATING_2);
194 		data |= 0x10000;
195 		if (tmp != data)
196 			WREG32(mmVCE_UENC_CLOCK_GATING_2, data);
197 		/* Set VCE_UENC_REG_CLOCK_GATING to dynamic */
198 		tmp = data = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
199 		data &= ~0xffc00000;
200 		if (tmp != data)
201 			WREG32(mmVCE_UENC_REG_CLOCK_GATING, data);
202 		/* Set VCE_UENC_DMA_DCLK_CTRL CG always in dynamic mode */
203 		tmp = data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL);
204 		data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
205 			  VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
206 			  VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
207 			  0x8);
208 		if (tmp != data)
209 			WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data);
210 	}
211 	vce_v3_0_override_vce_clock_gating(adev, false);
212 }
213 
214 static int vce_v3_0_firmware_loaded(struct amdgpu_device *adev)
215 {
216 	int i, j;
217 
218 	for (i = 0; i < 10; ++i) {
219 		for (j = 0; j < 100; ++j) {
220 			uint32_t status = RREG32(mmVCE_STATUS);
221 
222 			if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
223 				return 0;
224 			mdelay(10);
225 		}
226 
227 		DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
228 		WREG32_P(mmVCE_SOFT_RESET,
229 			VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
230 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
231 		mdelay(10);
232 		WREG32_P(mmVCE_SOFT_RESET, 0,
233 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
234 		mdelay(10);
235 	}
236 
237 	return -ETIMEDOUT;
238 }
239 
240 /**
241  * vce_v3_0_start - start VCE block
242  *
243  * @adev: amdgpu_device pointer
244  *
245  * Setup and start the VCE block
246  */
247 static int vce_v3_0_start(struct amdgpu_device *adev)
248 {
249 	struct amdgpu_ring *ring;
250 	int idx, r;
251 
252 	ring = &adev->vce.ring[0];
253 	WREG32(mmVCE_RB_RPTR, ring->wptr);
254 	WREG32(mmVCE_RB_WPTR, ring->wptr);
255 	WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr);
256 	WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
257 	WREG32(mmVCE_RB_SIZE, ring->ring_size / 4);
258 
259 	ring = &adev->vce.ring[1];
260 	WREG32(mmVCE_RB_RPTR2, ring->wptr);
261 	WREG32(mmVCE_RB_WPTR2, ring->wptr);
262 	WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr);
263 	WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
264 	WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4);
265 
266 	mutex_lock(&adev->grbm_idx_mutex);
267 	for (idx = 0; idx < 2; ++idx) {
268 		if (adev->vce.harvest_config & (1 << idx))
269 			continue;
270 
271 		if (idx == 0)
272 			WREG32_P(mmGRBM_GFX_INDEX, 0,
273 				~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
274 		else
275 			WREG32_P(mmGRBM_GFX_INDEX,
276 				GRBM_GFX_INDEX__VCE_INSTANCE_MASK,
277 				~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
278 
279 		vce_v3_0_mc_resume(adev, idx);
280 
281 		WREG32_P(mmVCE_STATUS, VCE_STATUS__JOB_BUSY_MASK,
282 		         ~VCE_STATUS__JOB_BUSY_MASK);
283 
284 		if (adev->asic_type >= CHIP_STONEY)
285 			WREG32_P(mmVCE_VCPU_CNTL, 1, ~0x200001);
286 		else
287 			WREG32_P(mmVCE_VCPU_CNTL, VCE_VCPU_CNTL__CLK_EN_MASK,
288 				~VCE_VCPU_CNTL__CLK_EN_MASK);
289 
290 		WREG32_P(mmVCE_SOFT_RESET, 0,
291 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
292 
293 		mdelay(100);
294 
295 		r = vce_v3_0_firmware_loaded(adev);
296 
297 		/* clear BUSY flag */
298 		WREG32_P(mmVCE_STATUS, 0, ~VCE_STATUS__JOB_BUSY_MASK);
299 
300 		/* Set Clock-Gating off */
301 		if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
302 			vce_v3_0_set_vce_sw_clock_gating(adev, false);
303 
304 		if (r) {
305 			DRM_ERROR("VCE not responding, giving up!!!\n");
306 			mutex_unlock(&adev->grbm_idx_mutex);
307 			return r;
308 		}
309 	}
310 
311 	WREG32_P(mmGRBM_GFX_INDEX, 0, ~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
312 	mutex_unlock(&adev->grbm_idx_mutex);
313 
314 	return 0;
315 }
316 
317 static int vce_v3_0_stop(struct amdgpu_device *adev)
318 {
319 	int idx;
320 
321 	mutex_lock(&adev->grbm_idx_mutex);
322 	for (idx = 0; idx < 2; ++idx) {
323 		if (adev->vce.harvest_config & (1 << idx))
324 			continue;
325 
326 		if (idx == 0)
327 			WREG32_P(mmGRBM_GFX_INDEX, 0,
328 				~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
329 		else
330 			WREG32_P(mmGRBM_GFX_INDEX,
331 				GRBM_GFX_INDEX__VCE_INSTANCE_MASK,
332 				~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
333 
334 		if (adev->asic_type >= CHIP_STONEY)
335 			WREG32_P(mmVCE_VCPU_CNTL, 0, ~0x200001);
336 		else
337 			WREG32_P(mmVCE_VCPU_CNTL, 0,
338 				~VCE_VCPU_CNTL__CLK_EN_MASK);
339 		/* hold on ECPU */
340 		WREG32_P(mmVCE_SOFT_RESET,
341 			 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
342 			 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
343 
344 		/* clear BUSY flag */
345 		WREG32_P(mmVCE_STATUS, 0, ~VCE_STATUS__JOB_BUSY_MASK);
346 
347 		/* Set Clock-Gating off */
348 		if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
349 			vce_v3_0_set_vce_sw_clock_gating(adev, false);
350 	}
351 
352 	WREG32_P(mmGRBM_GFX_INDEX, 0, ~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
353 	mutex_unlock(&adev->grbm_idx_mutex);
354 
355 	return 0;
356 }
357 
358 #define ixVCE_HARVEST_FUSE_MACRO__ADDRESS     0xC0014074
359 #define VCE_HARVEST_FUSE_MACRO__SHIFT       27
360 #define VCE_HARVEST_FUSE_MACRO__MASK        0x18000000
361 
362 static unsigned vce_v3_0_get_harvest_config(struct amdgpu_device *adev)
363 {
364 	u32 tmp;
365 
366 	/* Fiji, Stoney, Polaris10, Polaris11 are single pipe */
367 	if ((adev->asic_type == CHIP_FIJI) ||
368 	    (adev->asic_type == CHIP_STONEY) ||
369 	    (adev->asic_type == CHIP_POLARIS10) ||
370 	    (adev->asic_type == CHIP_POLARIS11))
371 		return AMDGPU_VCE_HARVEST_VCE1;
372 
373 	/* Tonga and CZ are dual or single pipe */
374 	if (adev->flags & AMD_IS_APU)
375 		tmp = (RREG32_SMC(ixVCE_HARVEST_FUSE_MACRO__ADDRESS) &
376 		       VCE_HARVEST_FUSE_MACRO__MASK) >>
377 			VCE_HARVEST_FUSE_MACRO__SHIFT;
378 	else
379 		tmp = (RREG32_SMC(ixCC_HARVEST_FUSES) &
380 		       CC_HARVEST_FUSES__VCE_DISABLE_MASK) >>
381 			CC_HARVEST_FUSES__VCE_DISABLE__SHIFT;
382 
383 	switch (tmp) {
384 	case 1:
385 		return AMDGPU_VCE_HARVEST_VCE0;
386 	case 2:
387 		return AMDGPU_VCE_HARVEST_VCE1;
388 	case 3:
389 		return AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1;
390 	default:
391 		return 0;
392 	}
393 }
394 
395 static int vce_v3_0_early_init(void *handle)
396 {
397 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
398 
399 	adev->vce.harvest_config = vce_v3_0_get_harvest_config(adev);
400 
401 	if ((adev->vce.harvest_config &
402 	     (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1)) ==
403 	    (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1))
404 		return -ENOENT;
405 
406 	vce_v3_0_set_ring_funcs(adev);
407 	vce_v3_0_set_irq_funcs(adev);
408 
409 	return 0;
410 }
411 
412 static int vce_v3_0_sw_init(void *handle)
413 {
414 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
415 	struct amdgpu_ring *ring;
416 	int r;
417 
418 	/* VCE */
419 	r = amdgpu_irq_add_id(adev, 167, &adev->vce.irq);
420 	if (r)
421 		return r;
422 
423 	r = amdgpu_vce_sw_init(adev, VCE_V3_0_FW_SIZE +
424 		(VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE) * 2);
425 	if (r)
426 		return r;
427 
428 	r = amdgpu_vce_resume(adev);
429 	if (r)
430 		return r;
431 
432 	ring = &adev->vce.ring[0];
433 	sprintf(ring->name, "vce0");
434 	r = amdgpu_ring_init(adev, ring, 512, VCE_CMD_NO_OP, 0xf,
435 			     &adev->vce.irq, 0, AMDGPU_RING_TYPE_VCE);
436 	if (r)
437 		return r;
438 
439 	ring = &adev->vce.ring[1];
440 	sprintf(ring->name, "vce1");
441 	r = amdgpu_ring_init(adev, ring, 512, VCE_CMD_NO_OP, 0xf,
442 			     &adev->vce.irq, 0, AMDGPU_RING_TYPE_VCE);
443 	if (r)
444 		return r;
445 
446 	return r;
447 }
448 
449 static int vce_v3_0_sw_fini(void *handle)
450 {
451 	int r;
452 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
453 
454 	r = amdgpu_vce_suspend(adev);
455 	if (r)
456 		return r;
457 
458 	r = amdgpu_vce_sw_fini(adev);
459 	if (r)
460 		return r;
461 
462 	return r;
463 }
464 
465 static int vce_v3_0_hw_init(void *handle)
466 {
467 	int r, i;
468 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
469 
470 	r = vce_v3_0_start(adev);
471 	if (r)
472 		return r;
473 
474 	adev->vce.ring[0].ready = false;
475 	adev->vce.ring[1].ready = false;
476 
477 	for (i = 0; i < 2; i++) {
478 		r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
479 		if (r)
480 			return r;
481 		else
482 			adev->vce.ring[i].ready = true;
483 	}
484 
485 	DRM_INFO("VCE initialized successfully.\n");
486 
487 	return 0;
488 }
489 
490 static int vce_v3_0_hw_fini(void *handle)
491 {
492 	int r;
493 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
494 
495 	r = vce_v3_0_wait_for_idle(handle);
496 	if (r)
497 		return r;
498 
499 	return vce_v3_0_stop(adev);
500 }
501 
502 static int vce_v3_0_suspend(void *handle)
503 {
504 	int r;
505 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
506 
507 	r = vce_v3_0_hw_fini(adev);
508 	if (r)
509 		return r;
510 
511 	r = amdgpu_vce_suspend(adev);
512 	if (r)
513 		return r;
514 
515 	return r;
516 }
517 
518 static int vce_v3_0_resume(void *handle)
519 {
520 	int r;
521 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
522 
523 	r = amdgpu_vce_resume(adev);
524 	if (r)
525 		return r;
526 
527 	r = vce_v3_0_hw_init(adev);
528 	if (r)
529 		return r;
530 
531 	return r;
532 }
533 
534 static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx)
535 {
536 	uint32_t offset, size;
537 
538 	WREG32_P(mmVCE_CLOCK_GATING_A, 0, ~(1 << 16));
539 	WREG32_P(mmVCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000);
540 	WREG32_P(mmVCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F);
541 	WREG32(mmVCE_CLOCK_GATING_B, 0xf7);
542 
543 	WREG32(mmVCE_LMI_CTRL, 0x00398000);
544 	WREG32_P(mmVCE_LMI_CACHE_CTRL, 0x0, ~0x1);
545 	WREG32(mmVCE_LMI_SWAP_CNTL, 0);
546 	WREG32(mmVCE_LMI_SWAP_CNTL1, 0);
547 	WREG32(mmVCE_LMI_VM_CTRL, 0);
548 	if (adev->asic_type >= CHIP_STONEY) {
549 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR0, (adev->vce.gpu_addr >> 8));
550 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR1, (adev->vce.gpu_addr >> 8));
551 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR2, (adev->vce.gpu_addr >> 8));
552 	} else
553 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8));
554 	offset = AMDGPU_VCE_FIRMWARE_OFFSET;
555 	size = VCE_V3_0_FW_SIZE;
556 	WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff);
557 	WREG32(mmVCE_VCPU_CACHE_SIZE0, size);
558 
559 	if (idx == 0) {
560 		offset += size;
561 		size = VCE_V3_0_STACK_SIZE;
562 		WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0x7fffffff);
563 		WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
564 		offset += size;
565 		size = VCE_V3_0_DATA_SIZE;
566 		WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0x7fffffff);
567 		WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
568 	} else {
569 		offset += size + VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE;
570 		size = VCE_V3_0_STACK_SIZE;
571 		WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0xfffffff);
572 		WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
573 		offset += size;
574 		size = VCE_V3_0_DATA_SIZE;
575 		WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0xfffffff);
576 		WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
577 	}
578 
579 	WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100);
580 
581 	WREG32_P(mmVCE_SYS_INT_EN, VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
582 		 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
583 }
584 
585 static bool vce_v3_0_is_idle(void *handle)
586 {
587 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
588 	u32 mask = 0;
589 
590 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
591 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
592 
593 	return !(RREG32(mmSRBM_STATUS2) & mask);
594 }
595 
596 static int vce_v3_0_wait_for_idle(void *handle)
597 {
598 	unsigned i;
599 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
600 
601 	for (i = 0; i < adev->usec_timeout; i++)
602 		if (vce_v3_0_is_idle(handle))
603 			return 0;
604 
605 	return -ETIMEDOUT;
606 }
607 
608 #define AMDGPU_VCE_STATUS_BUSY_MASK    0x78
609 
610 static int vce_v3_0_check_soft_reset(void *handle)
611 {
612 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
613 	u32 srbm_soft_reset = 0;
614 	u32 tmp;
615 
616 	/* VCE BUG: it is always busy, so skip its checking now */
617 	return 0;
618 
619 	/* According to VCE team , we should use VCE_STATUS instead
620 	 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
621 	 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
622 	 * instance's registers are accessed
623 	 * (0 for 1st instance, 10 for 2nd instance).
624 	 *
625 	 *VCE_STATUS
626 	 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
627 	 *|----+----+-----------+----+----+----+----------+---------+----|
628 	 *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
629 	 *
630 	 * VCE team suggest use bit 3--bit 6 for busy status check
631 	 */
632 	tmp = RREG32(mmGRBM_GFX_INDEX);
633 	tmp = REG_SET_FIELD(tmp, GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
634 	WREG32(mmGRBM_GFX_INDEX, tmp);
635 	if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
636 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
637 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
638 	}
639 	tmp = RREG32(mmGRBM_GFX_INDEX);
640 	tmp = REG_SET_FIELD(tmp, GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
641 	WREG32(mmGRBM_GFX_INDEX, tmp);
642 	if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
643 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
644 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
645 	}
646 	tmp = RREG32(mmGRBM_GFX_INDEX);
647 	tmp = REG_SET_FIELD(tmp, GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
648 	WREG32(mmGRBM_GFX_INDEX, tmp);
649 
650 	if (adev->vce.harvest_config & (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1))
651 		srbm_soft_reset = 0;
652 
653 	if (srbm_soft_reset) {
654 		adev->ip_block_status[AMD_IP_BLOCK_TYPE_VCE].hang = true;
655 		adev->vce.srbm_soft_reset = srbm_soft_reset;
656 	} else {
657 		adev->ip_block_status[AMD_IP_BLOCK_TYPE_VCE].hang = false;
658 		adev->vce.srbm_soft_reset = 0;
659 	}
660 	return 0;
661 }
662 
663 static int vce_v3_0_soft_reset(void *handle)
664 {
665 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
666 	u32 srbm_soft_reset;
667 
668 	if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_VCE].hang)
669 		return 0;
670 	srbm_soft_reset = adev->vce.srbm_soft_reset;
671 
672 	if (srbm_soft_reset) {
673 		u32 tmp;
674 
675 		tmp = RREG32(mmSRBM_SOFT_RESET);
676 		tmp |= srbm_soft_reset;
677 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
678 		WREG32(mmSRBM_SOFT_RESET, tmp);
679 		tmp = RREG32(mmSRBM_SOFT_RESET);
680 
681 		udelay(50);
682 
683 		tmp &= ~srbm_soft_reset;
684 		WREG32(mmSRBM_SOFT_RESET, tmp);
685 		tmp = RREG32(mmSRBM_SOFT_RESET);
686 
687 		/* Wait a little for things to settle down */
688 		udelay(50);
689 	}
690 
691 	return 0;
692 }
693 
694 static int vce_v3_0_pre_soft_reset(void *handle)
695 {
696 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
697 
698 	if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_VCE].hang)
699 		return 0;
700 
701 	mdelay(5);
702 
703 	return vce_v3_0_suspend(adev);
704 }
705 
706 
707 static int vce_v3_0_post_soft_reset(void *handle)
708 {
709 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
710 
711 	if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_VCE].hang)
712 		return 0;
713 
714 	mdelay(5);
715 
716 	return vce_v3_0_resume(adev);
717 }
718 
719 static int vce_v3_0_set_interrupt_state(struct amdgpu_device *adev,
720 					struct amdgpu_irq_src *source,
721 					unsigned type,
722 					enum amdgpu_interrupt_state state)
723 {
724 	uint32_t val = 0;
725 
726 	if (state == AMDGPU_IRQ_STATE_ENABLE)
727 		val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
728 
729 	WREG32_P(mmVCE_SYS_INT_EN, val, ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
730 	return 0;
731 }
732 
733 static int vce_v3_0_process_interrupt(struct amdgpu_device *adev,
734 				      struct amdgpu_irq_src *source,
735 				      struct amdgpu_iv_entry *entry)
736 {
737 	DRM_DEBUG("IH: VCE\n");
738 
739 	WREG32_P(mmVCE_SYS_INT_STATUS,
740 		VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK,
741 		~VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK);
742 
743 	switch (entry->src_data) {
744 	case 0:
745 	case 1:
746 		amdgpu_fence_process(&adev->vce.ring[entry->src_data]);
747 		break;
748 	default:
749 		DRM_ERROR("Unhandled interrupt: %d %d\n",
750 			  entry->src_id, entry->src_data);
751 		break;
752 	}
753 
754 	return 0;
755 }
756 
757 static void vce_v3_set_bypass_mode(struct amdgpu_device *adev, bool enable)
758 {
759 	u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
760 
761 	if (enable)
762 		tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
763 	else
764 		tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
765 
766 	WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
767 }
768 
769 static int vce_v3_0_set_clockgating_state(void *handle,
770 					  enum amd_clockgating_state state)
771 {
772 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
773 	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
774 	int i;
775 
776 	if (adev->asic_type == CHIP_POLARIS10)
777 		vce_v3_set_bypass_mode(adev, enable);
778 
779 	if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
780 		return 0;
781 
782 	mutex_lock(&adev->grbm_idx_mutex);
783 	for (i = 0; i < 2; i++) {
784 		/* Program VCE Instance 0 or 1 if not harvested */
785 		if (adev->vce.harvest_config & (1 << i))
786 			continue;
787 
788 		if (i == 0)
789 			WREG32_P(mmGRBM_GFX_INDEX, 0,
790 					~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
791 		else
792 			WREG32_P(mmGRBM_GFX_INDEX,
793 					GRBM_GFX_INDEX__VCE_INSTANCE_MASK,
794 					~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
795 
796 		if (enable) {
797 			/* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
798 			uint32_t data = RREG32(mmVCE_CLOCK_GATING_A);
799 			data &= ~(0xf | 0xff0);
800 			data |= ((0x0 << 0) | (0x04 << 4));
801 			WREG32(mmVCE_CLOCK_GATING_A, data);
802 
803 			/* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
804 			data = RREG32(mmVCE_UENC_CLOCK_GATING);
805 			data &= ~(0xf | 0xff0);
806 			data |= ((0x0 << 0) | (0x04 << 4));
807 			WREG32(mmVCE_UENC_CLOCK_GATING, data);
808 		}
809 
810 		vce_v3_0_set_vce_sw_clock_gating(adev, enable);
811 	}
812 
813 	WREG32_P(mmGRBM_GFX_INDEX, 0, ~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
814 	mutex_unlock(&adev->grbm_idx_mutex);
815 
816 	return 0;
817 }
818 
819 static int vce_v3_0_set_powergating_state(void *handle,
820 					  enum amd_powergating_state state)
821 {
822 	/* This doesn't actually powergate the VCE block.
823 	 * That's done in the dpm code via the SMC.  This
824 	 * just re-inits the block as necessary.  The actual
825 	 * gating still happens in the dpm code.  We should
826 	 * revisit this when there is a cleaner line between
827 	 * the smc and the hw blocks
828 	 */
829 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
830 
831 	if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE))
832 		return 0;
833 
834 	if (state == AMD_PG_STATE_GATE)
835 		/* XXX do we need a vce_v3_0_stop()? */
836 		return 0;
837 	else
838 		return vce_v3_0_start(adev);
839 }
840 
841 const struct amd_ip_funcs vce_v3_0_ip_funcs = {
842 	.name = "vce_v3_0",
843 	.early_init = vce_v3_0_early_init,
844 	.late_init = NULL,
845 	.sw_init = vce_v3_0_sw_init,
846 	.sw_fini = vce_v3_0_sw_fini,
847 	.hw_init = vce_v3_0_hw_init,
848 	.hw_fini = vce_v3_0_hw_fini,
849 	.suspend = vce_v3_0_suspend,
850 	.resume = vce_v3_0_resume,
851 	.is_idle = vce_v3_0_is_idle,
852 	.wait_for_idle = vce_v3_0_wait_for_idle,
853 	.check_soft_reset = vce_v3_0_check_soft_reset,
854 	.pre_soft_reset = vce_v3_0_pre_soft_reset,
855 	.soft_reset = vce_v3_0_soft_reset,
856 	.post_soft_reset = vce_v3_0_post_soft_reset,
857 	.set_clockgating_state = vce_v3_0_set_clockgating_state,
858 	.set_powergating_state = vce_v3_0_set_powergating_state,
859 };
860 
861 static const struct amdgpu_ring_funcs vce_v3_0_ring_funcs = {
862 	.get_rptr = vce_v3_0_ring_get_rptr,
863 	.get_wptr = vce_v3_0_ring_get_wptr,
864 	.set_wptr = vce_v3_0_ring_set_wptr,
865 	.parse_cs = amdgpu_vce_ring_parse_cs,
866 	.emit_ib = amdgpu_vce_ring_emit_ib,
867 	.emit_fence = amdgpu_vce_ring_emit_fence,
868 	.test_ring = amdgpu_vce_ring_test_ring,
869 	.test_ib = amdgpu_vce_ring_test_ib,
870 	.insert_nop = amdgpu_ring_insert_nop,
871 	.pad_ib = amdgpu_ring_generic_pad_ib,
872 	.begin_use = amdgpu_vce_ring_begin_use,
873 	.end_use = amdgpu_vce_ring_end_use,
874 };
875 
876 static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev)
877 {
878 	adev->vce.ring[0].funcs = &vce_v3_0_ring_funcs;
879 	adev->vce.ring[1].funcs = &vce_v3_0_ring_funcs;
880 }
881 
882 static const struct amdgpu_irq_src_funcs vce_v3_0_irq_funcs = {
883 	.set = vce_v3_0_set_interrupt_state,
884 	.process = vce_v3_0_process_interrupt,
885 };
886 
887 static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev)
888 {
889 	adev->vce.irq.num_types = 1;
890 	adev->vce.irq.funcs = &vce_v3_0_irq_funcs;
891 };
892