xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c (revision 293d5b43)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  * Authors: Christian König <christian.koenig@amd.com>
26  */
27 
28 #include <linux/firmware.h>
29 #include <drm/drmP.h>
30 #include "amdgpu.h"
31 #include "amdgpu_vce.h"
32 #include "vid.h"
33 #include "vce/vce_3_0_d.h"
34 #include "vce/vce_3_0_sh_mask.h"
35 #include "oss/oss_3_0_d.h"
36 #include "oss/oss_3_0_sh_mask.h"
37 #include "gca/gfx_8_0_d.h"
38 #include "smu/smu_7_1_2_d.h"
39 #include "smu/smu_7_1_2_sh_mask.h"
40 
41 #define GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT	0x04
42 #define GRBM_GFX_INDEX__VCE_INSTANCE_MASK	0x10
43 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR0	0x8616
44 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR1	0x8617
45 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR2	0x8618
46 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
47 
48 #define VCE_V3_0_FW_SIZE	(384 * 1024)
49 #define VCE_V3_0_STACK_SIZE	(64 * 1024)
50 #define VCE_V3_0_DATA_SIZE	((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
51 
52 static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx);
53 static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev);
54 static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev);
55 static int vce_v3_0_wait_for_idle(void *handle);
56 
57 /**
58  * vce_v3_0_ring_get_rptr - get read pointer
59  *
60  * @ring: amdgpu_ring pointer
61  *
62  * Returns the current hardware read pointer
63  */
64 static uint32_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring)
65 {
66 	struct amdgpu_device *adev = ring->adev;
67 
68 	if (ring == &adev->vce.ring[0])
69 		return RREG32(mmVCE_RB_RPTR);
70 	else
71 		return RREG32(mmVCE_RB_RPTR2);
72 }
73 
74 /**
75  * vce_v3_0_ring_get_wptr - get write pointer
76  *
77  * @ring: amdgpu_ring pointer
78  *
79  * Returns the current hardware write pointer
80  */
81 static uint32_t vce_v3_0_ring_get_wptr(struct amdgpu_ring *ring)
82 {
83 	struct amdgpu_device *adev = ring->adev;
84 
85 	if (ring == &adev->vce.ring[0])
86 		return RREG32(mmVCE_RB_WPTR);
87 	else
88 		return RREG32(mmVCE_RB_WPTR2);
89 }
90 
91 /**
92  * vce_v3_0_ring_set_wptr - set write pointer
93  *
94  * @ring: amdgpu_ring pointer
95  *
96  * Commits the write pointer to the hardware
97  */
98 static void vce_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
99 {
100 	struct amdgpu_device *adev = ring->adev;
101 
102 	if (ring == &adev->vce.ring[0])
103 		WREG32(mmVCE_RB_WPTR, ring->wptr);
104 	else
105 		WREG32(mmVCE_RB_WPTR2, ring->wptr);
106 }
107 
108 static void vce_v3_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
109 {
110 	u32 tmp, data;
111 
112 	tmp = data = RREG32(mmVCE_RB_ARB_CTRL);
113 	if (override)
114 		data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
115 	else
116 		data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
117 
118 	if (tmp != data)
119 		WREG32(mmVCE_RB_ARB_CTRL, data);
120 }
121 
122 static void vce_v3_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
123 					     bool gated)
124 {
125 	u32 tmp, data;
126 	/* Set Override to disable Clock Gating */
127 	vce_v3_0_override_vce_clock_gating(adev, true);
128 
129 	if (!gated) {
130 		/* Force CLOCK ON for VCE_CLOCK_GATING_B,
131 		 * {*_FORCE_ON, *_FORCE_OFF} = {1, 0}
132 		 * VREG can be FORCE ON or set to Dynamic, but can't be OFF
133 		 */
134 		tmp = data = RREG32(mmVCE_CLOCK_GATING_B);
135 		data |= 0x1ff;
136 		data &= ~0xef0000;
137 		if (tmp != data)
138 			WREG32(mmVCE_CLOCK_GATING_B, data);
139 
140 		/* Force CLOCK ON for VCE_UENC_CLOCK_GATING,
141 		 * {*_FORCE_ON, *_FORCE_OFF} = {1, 0}
142 		 */
143 		tmp = data = RREG32(mmVCE_UENC_CLOCK_GATING);
144 		data |= 0x3ff000;
145 		data &= ~0xffc00000;
146 		if (tmp != data)
147 			WREG32(mmVCE_UENC_CLOCK_GATING, data);
148 
149 		/* set VCE_UENC_CLOCK_GATING_2 */
150 		tmp = data = RREG32(mmVCE_UENC_CLOCK_GATING_2);
151 		data |= 0x2;
152 		data &= ~0x2;
153 		if (tmp != data)
154 			WREG32(mmVCE_UENC_CLOCK_GATING_2, data);
155 
156 		/* Force CLOCK ON for VCE_UENC_REG_CLOCK_GATING */
157 		tmp = data = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
158 		data |= 0x37f;
159 		if (tmp != data)
160 			WREG32(mmVCE_UENC_REG_CLOCK_GATING, data);
161 
162 		/* Force VCE_UENC_DMA_DCLK_CTRL Clock ON */
163 		tmp = data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL);
164 		data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
165 				VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
166 				VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
167 				0x8;
168 		if (tmp != data)
169 			WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data);
170 	} else {
171 		/* Force CLOCK OFF for VCE_CLOCK_GATING_B,
172 		 * {*, *_FORCE_OFF} = {*, 1}
173 		 * set VREG to Dynamic, as it can't be OFF
174 		 */
175 		tmp = data = RREG32(mmVCE_CLOCK_GATING_B);
176 		data &= ~0x80010;
177 		data |= 0xe70008;
178 		if (tmp != data)
179 			WREG32(mmVCE_CLOCK_GATING_B, data);
180 		/* Force CLOCK OFF for VCE_UENC_CLOCK_GATING,
181 		 * Force ClOCK OFF takes precedent over Force CLOCK ON setting.
182 		 * {*_FORCE_ON, *_FORCE_OFF} = {*, 1}
183 		 */
184 		tmp = data = RREG32(mmVCE_UENC_CLOCK_GATING);
185 		data |= 0xffc00000;
186 		if (tmp != data)
187 			WREG32(mmVCE_UENC_CLOCK_GATING, data);
188 		/* Set VCE_UENC_CLOCK_GATING_2 */
189 		tmp = data = RREG32(mmVCE_UENC_CLOCK_GATING_2);
190 		data |= 0x10000;
191 		if (tmp != data)
192 			WREG32(mmVCE_UENC_CLOCK_GATING_2, data);
193 		/* Set VCE_UENC_REG_CLOCK_GATING to dynamic */
194 		tmp = data = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
195 		data &= ~0xffc00000;
196 		if (tmp != data)
197 			WREG32(mmVCE_UENC_REG_CLOCK_GATING, data);
198 		/* Set VCE_UENC_DMA_DCLK_CTRL CG always in dynamic mode */
199 		tmp = data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL);
200 		data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
201 				VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
202 				VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
203 				0x8);
204 		if (tmp != data)
205 			WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data);
206 	}
207 	vce_v3_0_override_vce_clock_gating(adev, false);
208 }
209 
210 static int vce_v3_0_firmware_loaded(struct amdgpu_device *adev)
211 {
212 	int i, j;
213 
214 	for (i = 0; i < 10; ++i) {
215 		for (j = 0; j < 100; ++j) {
216 			uint32_t status = RREG32(mmVCE_STATUS);
217 
218 			if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
219 				return 0;
220 			mdelay(10);
221 		}
222 
223 		DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
224 		WREG32_P(mmVCE_SOFT_RESET,
225 			VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
226 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
227 		mdelay(10);
228 		WREG32_P(mmVCE_SOFT_RESET, 0,
229 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
230 		mdelay(10);
231 	}
232 
233 	return -ETIMEDOUT;
234 }
235 
236 /**
237  * vce_v3_0_start - start VCE block
238  *
239  * @adev: amdgpu_device pointer
240  *
241  * Setup and start the VCE block
242  */
243 static int vce_v3_0_start(struct amdgpu_device *adev)
244 {
245 	struct amdgpu_ring *ring;
246 	int idx, r;
247 
248 	ring = &adev->vce.ring[0];
249 	WREG32(mmVCE_RB_RPTR, ring->wptr);
250 	WREG32(mmVCE_RB_WPTR, ring->wptr);
251 	WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr);
252 	WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
253 	WREG32(mmVCE_RB_SIZE, ring->ring_size / 4);
254 
255 	ring = &adev->vce.ring[1];
256 	WREG32(mmVCE_RB_RPTR2, ring->wptr);
257 	WREG32(mmVCE_RB_WPTR2, ring->wptr);
258 	WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr);
259 	WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
260 	WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4);
261 
262 	mutex_lock(&adev->grbm_idx_mutex);
263 	for (idx = 0; idx < 2; ++idx) {
264 		if (adev->vce.harvest_config & (1 << idx))
265 			continue;
266 
267 		if (idx == 0)
268 			WREG32_P(mmGRBM_GFX_INDEX, 0,
269 				~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
270 		else
271 			WREG32_P(mmGRBM_GFX_INDEX,
272 				GRBM_GFX_INDEX__VCE_INSTANCE_MASK,
273 				~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
274 
275 		vce_v3_0_mc_resume(adev, idx);
276 
277 		WREG32_P(mmVCE_STATUS, VCE_STATUS__JOB_BUSY_MASK,
278 		         ~VCE_STATUS__JOB_BUSY_MASK);
279 
280 		if (adev->asic_type >= CHIP_STONEY)
281 			WREG32_P(mmVCE_VCPU_CNTL, 1, ~0x200001);
282 		else
283 			WREG32_P(mmVCE_VCPU_CNTL, VCE_VCPU_CNTL__CLK_EN_MASK,
284 				~VCE_VCPU_CNTL__CLK_EN_MASK);
285 
286 		WREG32_P(mmVCE_SOFT_RESET, 0,
287 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
288 
289 		mdelay(100);
290 
291 		r = vce_v3_0_firmware_loaded(adev);
292 
293 		/* clear BUSY flag */
294 		WREG32_P(mmVCE_STATUS, 0, ~VCE_STATUS__JOB_BUSY_MASK);
295 
296 		/* Set Clock-Gating off */
297 		if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
298 			vce_v3_0_set_vce_sw_clock_gating(adev, false);
299 
300 		if (r) {
301 			DRM_ERROR("VCE not responding, giving up!!!\n");
302 			mutex_unlock(&adev->grbm_idx_mutex);
303 			return r;
304 		}
305 	}
306 
307 	WREG32_P(mmGRBM_GFX_INDEX, 0, ~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
308 	mutex_unlock(&adev->grbm_idx_mutex);
309 
310 	return 0;
311 }
312 
313 static int vce_v3_0_stop(struct amdgpu_device *adev)
314 {
315 	int idx;
316 
317 	mutex_lock(&adev->grbm_idx_mutex);
318 	for (idx = 0; idx < 2; ++idx) {
319 		if (adev->vce.harvest_config & (1 << idx))
320 			continue;
321 
322 		if (idx == 0)
323 			WREG32_P(mmGRBM_GFX_INDEX, 0,
324 				~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
325 		else
326 			WREG32_P(mmGRBM_GFX_INDEX,
327 				GRBM_GFX_INDEX__VCE_INSTANCE_MASK,
328 				~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
329 
330 		if (adev->asic_type >= CHIP_STONEY)
331 			WREG32_P(mmVCE_VCPU_CNTL, 0, ~0x200001);
332 		else
333 			WREG32_P(mmVCE_VCPU_CNTL, 0,
334 				~VCE_VCPU_CNTL__CLK_EN_MASK);
335 		/* hold on ECPU */
336 		WREG32_P(mmVCE_SOFT_RESET,
337 			 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
338 			 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
339 
340 		/* clear BUSY flag */
341 		WREG32_P(mmVCE_STATUS, 0, ~VCE_STATUS__JOB_BUSY_MASK);
342 
343 		/* Set Clock-Gating off */
344 		if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
345 			vce_v3_0_set_vce_sw_clock_gating(adev, false);
346 	}
347 
348 	WREG32_P(mmGRBM_GFX_INDEX, 0, ~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
349 	mutex_unlock(&adev->grbm_idx_mutex);
350 
351 	return 0;
352 }
353 
354 #define ixVCE_HARVEST_FUSE_MACRO__ADDRESS     0xC0014074
355 #define VCE_HARVEST_FUSE_MACRO__SHIFT       27
356 #define VCE_HARVEST_FUSE_MACRO__MASK        0x18000000
357 
358 static unsigned vce_v3_0_get_harvest_config(struct amdgpu_device *adev)
359 {
360 	u32 tmp;
361 
362 	/* Fiji, Stoney, Polaris10, Polaris11 are single pipe */
363 	if ((adev->asic_type == CHIP_FIJI) ||
364 	    (adev->asic_type == CHIP_STONEY) ||
365 	    (adev->asic_type == CHIP_POLARIS10) ||
366 	    (adev->asic_type == CHIP_POLARIS11))
367 		return AMDGPU_VCE_HARVEST_VCE1;
368 
369 	/* Tonga and CZ are dual or single pipe */
370 	if (adev->flags & AMD_IS_APU)
371 		tmp = (RREG32_SMC(ixVCE_HARVEST_FUSE_MACRO__ADDRESS) &
372 		       VCE_HARVEST_FUSE_MACRO__MASK) >>
373 			VCE_HARVEST_FUSE_MACRO__SHIFT;
374 	else
375 		tmp = (RREG32_SMC(ixCC_HARVEST_FUSES) &
376 		       CC_HARVEST_FUSES__VCE_DISABLE_MASK) >>
377 			CC_HARVEST_FUSES__VCE_DISABLE__SHIFT;
378 
379 	switch (tmp) {
380 	case 1:
381 		return AMDGPU_VCE_HARVEST_VCE0;
382 	case 2:
383 		return AMDGPU_VCE_HARVEST_VCE1;
384 	case 3:
385 		return AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1;
386 	default:
387 		return 0;
388 	}
389 }
390 
391 static int vce_v3_0_early_init(void *handle)
392 {
393 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
394 
395 	adev->vce.harvest_config = vce_v3_0_get_harvest_config(adev);
396 
397 	if ((adev->vce.harvest_config &
398 	     (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1)) ==
399 	    (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1))
400 		return -ENOENT;
401 
402 	vce_v3_0_set_ring_funcs(adev);
403 	vce_v3_0_set_irq_funcs(adev);
404 
405 	return 0;
406 }
407 
408 static int vce_v3_0_sw_init(void *handle)
409 {
410 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
411 	struct amdgpu_ring *ring;
412 	int r;
413 
414 	/* VCE */
415 	r = amdgpu_irq_add_id(adev, 167, &adev->vce.irq);
416 	if (r)
417 		return r;
418 
419 	r = amdgpu_vce_sw_init(adev, VCE_V3_0_FW_SIZE +
420 		(VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE) * 2);
421 	if (r)
422 		return r;
423 
424 	r = amdgpu_vce_resume(adev);
425 	if (r)
426 		return r;
427 
428 	ring = &adev->vce.ring[0];
429 	sprintf(ring->name, "vce0");
430 	r = amdgpu_ring_init(adev, ring, 512, VCE_CMD_NO_OP, 0xf,
431 			     &adev->vce.irq, 0, AMDGPU_RING_TYPE_VCE);
432 	if (r)
433 		return r;
434 
435 	ring = &adev->vce.ring[1];
436 	sprintf(ring->name, "vce1");
437 	r = amdgpu_ring_init(adev, ring, 512, VCE_CMD_NO_OP, 0xf,
438 			     &adev->vce.irq, 0, AMDGPU_RING_TYPE_VCE);
439 	if (r)
440 		return r;
441 
442 	return r;
443 }
444 
445 static int vce_v3_0_sw_fini(void *handle)
446 {
447 	int r;
448 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
449 
450 	r = amdgpu_vce_suspend(adev);
451 	if (r)
452 		return r;
453 
454 	r = amdgpu_vce_sw_fini(adev);
455 	if (r)
456 		return r;
457 
458 	return r;
459 }
460 
461 static int vce_v3_0_hw_init(void *handle)
462 {
463 	int r, i;
464 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
465 
466 	r = vce_v3_0_start(adev);
467 	if (r)
468 		return r;
469 
470 	adev->vce.ring[0].ready = false;
471 	adev->vce.ring[1].ready = false;
472 
473 	for (i = 0; i < 2; i++) {
474 		r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
475 		if (r)
476 			return r;
477 		else
478 			adev->vce.ring[i].ready = true;
479 	}
480 
481 	DRM_INFO("VCE initialized successfully.\n");
482 
483 	return 0;
484 }
485 
486 static int vce_v3_0_hw_fini(void *handle)
487 {
488 	int r;
489 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
490 
491 	r = vce_v3_0_wait_for_idle(handle);
492 	if (r)
493 		return r;
494 
495 	return vce_v3_0_stop(adev);
496 }
497 
498 static int vce_v3_0_suspend(void *handle)
499 {
500 	int r;
501 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
502 
503 	r = vce_v3_0_hw_fini(adev);
504 	if (r)
505 		return r;
506 
507 	r = amdgpu_vce_suspend(adev);
508 	if (r)
509 		return r;
510 
511 	return r;
512 }
513 
514 static int vce_v3_0_resume(void *handle)
515 {
516 	int r;
517 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
518 
519 	r = amdgpu_vce_resume(adev);
520 	if (r)
521 		return r;
522 
523 	r = vce_v3_0_hw_init(adev);
524 	if (r)
525 		return r;
526 
527 	return r;
528 }
529 
530 static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx)
531 {
532 	uint32_t offset, size;
533 
534 	WREG32_P(mmVCE_CLOCK_GATING_A, 0, ~(1 << 16));
535 	WREG32_P(mmVCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000);
536 	WREG32_P(mmVCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F);
537 	WREG32(mmVCE_CLOCK_GATING_B, 0xf7);
538 
539 	WREG32(mmVCE_LMI_CTRL, 0x00398000);
540 	WREG32_P(mmVCE_LMI_CACHE_CTRL, 0x0, ~0x1);
541 	WREG32(mmVCE_LMI_SWAP_CNTL, 0);
542 	WREG32(mmVCE_LMI_SWAP_CNTL1, 0);
543 	WREG32(mmVCE_LMI_VM_CTRL, 0);
544 	if (adev->asic_type >= CHIP_STONEY) {
545 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR0, (adev->vce.gpu_addr >> 8));
546 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR1, (adev->vce.gpu_addr >> 8));
547 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR2, (adev->vce.gpu_addr >> 8));
548 	} else
549 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8));
550 	offset = AMDGPU_VCE_FIRMWARE_OFFSET;
551 	size = VCE_V3_0_FW_SIZE;
552 	WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff);
553 	WREG32(mmVCE_VCPU_CACHE_SIZE0, size);
554 
555 	if (idx == 0) {
556 		offset += size;
557 		size = VCE_V3_0_STACK_SIZE;
558 		WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0x7fffffff);
559 		WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
560 		offset += size;
561 		size = VCE_V3_0_DATA_SIZE;
562 		WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0x7fffffff);
563 		WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
564 	} else {
565 		offset += size + VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE;
566 		size = VCE_V3_0_STACK_SIZE;
567 		WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0xfffffff);
568 		WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
569 		offset += size;
570 		size = VCE_V3_0_DATA_SIZE;
571 		WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0xfffffff);
572 		WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
573 	}
574 
575 	WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100);
576 
577 	WREG32_P(mmVCE_SYS_INT_EN, VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
578 		 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
579 }
580 
581 static bool vce_v3_0_is_idle(void *handle)
582 {
583 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
584 	u32 mask = 0;
585 
586 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
587 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
588 
589 	return !(RREG32(mmSRBM_STATUS2) & mask);
590 }
591 
592 static int vce_v3_0_wait_for_idle(void *handle)
593 {
594 	unsigned i;
595 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
596 
597 	for (i = 0; i < adev->usec_timeout; i++)
598 		if (vce_v3_0_is_idle(handle))
599 			return 0;
600 
601 	return -ETIMEDOUT;
602 }
603 
604 static int vce_v3_0_soft_reset(void *handle)
605 {
606 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
607 	u32 mask = 0;
608 
609 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_SOFT_RESET__SOFT_RESET_VCE0_MASK;
610 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_SOFT_RESET__SOFT_RESET_VCE1_MASK;
611 
612 	WREG32_P(mmSRBM_SOFT_RESET, mask,
613 		 ~(SRBM_SOFT_RESET__SOFT_RESET_VCE0_MASK |
614 		   SRBM_SOFT_RESET__SOFT_RESET_VCE1_MASK));
615 	mdelay(5);
616 
617 	return vce_v3_0_start(adev);
618 }
619 
620 static int vce_v3_0_set_interrupt_state(struct amdgpu_device *adev,
621 					struct amdgpu_irq_src *source,
622 					unsigned type,
623 					enum amdgpu_interrupt_state state)
624 {
625 	uint32_t val = 0;
626 
627 	if (state == AMDGPU_IRQ_STATE_ENABLE)
628 		val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
629 
630 	WREG32_P(mmVCE_SYS_INT_EN, val, ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
631 	return 0;
632 }
633 
634 static int vce_v3_0_process_interrupt(struct amdgpu_device *adev,
635 				      struct amdgpu_irq_src *source,
636 				      struct amdgpu_iv_entry *entry)
637 {
638 	DRM_DEBUG("IH: VCE\n");
639 
640 	WREG32_P(mmVCE_SYS_INT_STATUS,
641 		VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK,
642 		~VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK);
643 
644 	switch (entry->src_data) {
645 	case 0:
646 	case 1:
647 		amdgpu_fence_process(&adev->vce.ring[entry->src_data]);
648 		break;
649 	default:
650 		DRM_ERROR("Unhandled interrupt: %d %d\n",
651 			  entry->src_id, entry->src_data);
652 		break;
653 	}
654 
655 	return 0;
656 }
657 
658 static void vce_v3_set_bypass_mode(struct amdgpu_device *adev, bool enable)
659 {
660 	u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
661 
662 	if (enable)
663 		tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
664 	else
665 		tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
666 
667 	WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
668 }
669 
670 static int vce_v3_0_set_clockgating_state(void *handle,
671 					  enum amd_clockgating_state state)
672 {
673 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
674 	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
675 	int i;
676 
677 	if (adev->asic_type == CHIP_POLARIS10)
678 		vce_v3_set_bypass_mode(adev, enable);
679 
680 	if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
681 		return 0;
682 
683 	mutex_lock(&adev->grbm_idx_mutex);
684 	for (i = 0; i < 2; i++) {
685 		/* Program VCE Instance 0 or 1 if not harvested */
686 		if (adev->vce.harvest_config & (1 << i))
687 			continue;
688 
689 		if (i == 0)
690 			WREG32_P(mmGRBM_GFX_INDEX, 0,
691 					~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
692 		else
693 			WREG32_P(mmGRBM_GFX_INDEX,
694 					GRBM_GFX_INDEX__VCE_INSTANCE_MASK,
695 					~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
696 
697 		if (enable) {
698 			/* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
699 			uint32_t data = RREG32(mmVCE_CLOCK_GATING_A);
700 			data &= ~(0xf | 0xff0);
701 			data |= ((0x0 << 0) | (0x04 << 4));
702 			WREG32(mmVCE_CLOCK_GATING_A, data);
703 
704 			/* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
705 			data = RREG32(mmVCE_UENC_CLOCK_GATING);
706 			data &= ~(0xf | 0xff0);
707 			data |= ((0x0 << 0) | (0x04 << 4));
708 			WREG32(mmVCE_UENC_CLOCK_GATING, data);
709 		}
710 
711 		vce_v3_0_set_vce_sw_clock_gating(adev, enable);
712 	}
713 
714 	WREG32_P(mmGRBM_GFX_INDEX, 0, ~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
715 	mutex_unlock(&adev->grbm_idx_mutex);
716 
717 	return 0;
718 }
719 
720 static int vce_v3_0_set_powergating_state(void *handle,
721 					  enum amd_powergating_state state)
722 {
723 	/* This doesn't actually powergate the VCE block.
724 	 * That's done in the dpm code via the SMC.  This
725 	 * just re-inits the block as necessary.  The actual
726 	 * gating still happens in the dpm code.  We should
727 	 * revisit this when there is a cleaner line between
728 	 * the smc and the hw blocks
729 	 */
730 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
731 
732 	if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE))
733 		return 0;
734 
735 	if (state == AMD_PG_STATE_GATE)
736 		/* XXX do we need a vce_v3_0_stop()? */
737 		return 0;
738 	else
739 		return vce_v3_0_start(adev);
740 }
741 
742 const struct amd_ip_funcs vce_v3_0_ip_funcs = {
743 	.name = "vce_v3_0",
744 	.early_init = vce_v3_0_early_init,
745 	.late_init = NULL,
746 	.sw_init = vce_v3_0_sw_init,
747 	.sw_fini = vce_v3_0_sw_fini,
748 	.hw_init = vce_v3_0_hw_init,
749 	.hw_fini = vce_v3_0_hw_fini,
750 	.suspend = vce_v3_0_suspend,
751 	.resume = vce_v3_0_resume,
752 	.is_idle = vce_v3_0_is_idle,
753 	.wait_for_idle = vce_v3_0_wait_for_idle,
754 	.soft_reset = vce_v3_0_soft_reset,
755 	.set_clockgating_state = vce_v3_0_set_clockgating_state,
756 	.set_powergating_state = vce_v3_0_set_powergating_state,
757 };
758 
759 static const struct amdgpu_ring_funcs vce_v3_0_ring_funcs = {
760 	.get_rptr = vce_v3_0_ring_get_rptr,
761 	.get_wptr = vce_v3_0_ring_get_wptr,
762 	.set_wptr = vce_v3_0_ring_set_wptr,
763 	.parse_cs = amdgpu_vce_ring_parse_cs,
764 	.emit_ib = amdgpu_vce_ring_emit_ib,
765 	.emit_fence = amdgpu_vce_ring_emit_fence,
766 	.test_ring = amdgpu_vce_ring_test_ring,
767 	.test_ib = amdgpu_vce_ring_test_ib,
768 	.insert_nop = amdgpu_ring_insert_nop,
769 	.pad_ib = amdgpu_ring_generic_pad_ib,
770 	.begin_use = amdgpu_vce_ring_begin_use,
771 	.end_use = amdgpu_vce_ring_end_use,
772 };
773 
774 static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev)
775 {
776 	adev->vce.ring[0].funcs = &vce_v3_0_ring_funcs;
777 	adev->vce.ring[1].funcs = &vce_v3_0_ring_funcs;
778 }
779 
780 static const struct amdgpu_irq_src_funcs vce_v3_0_irq_funcs = {
781 	.set = vce_v3_0_set_interrupt_state,
782 	.process = vce_v3_0_process_interrupt,
783 };
784 
785 static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev)
786 {
787 	adev->vce.irq.num_types = 1;
788 	adev->vce.irq.funcs = &vce_v3_0_irq_funcs;
789 };
790