xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c (revision 6d99a79c)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  * Authors: Christian König <christian.koenig@amd.com>
26  */
27 
28 #include <linux/firmware.h>
29 #include <drm/drmP.h>
30 #include "amdgpu.h"
31 #include "amdgpu_vce.h"
32 #include "vid.h"
33 #include "vce/vce_3_0_d.h"
34 #include "vce/vce_3_0_sh_mask.h"
35 #include "oss/oss_3_0_d.h"
36 #include "oss/oss_3_0_sh_mask.h"
37 #include "gca/gfx_8_0_d.h"
38 #include "smu/smu_7_1_2_d.h"
39 #include "smu/smu_7_1_2_sh_mask.h"
40 #include "gca/gfx_8_0_d.h"
41 #include "gca/gfx_8_0_sh_mask.h"
42 #include "ivsrcid/ivsrcid_vislands30.h"
43 
44 
45 #define GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT	0x04
46 #define GRBM_GFX_INDEX__VCE_INSTANCE_MASK	0x10
47 #define GRBM_GFX_INDEX__VCE_ALL_PIPE		0x07
48 
49 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR0	0x8616
50 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR1	0x8617
51 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR2	0x8618
52 #define mmGRBM_GFX_INDEX_DEFAULT 0xE0000000
53 
54 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
55 
56 #define VCE_V3_0_FW_SIZE	(384 * 1024)
57 #define VCE_V3_0_STACK_SIZE	(64 * 1024)
58 #define VCE_V3_0_DATA_SIZE	((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
59 
60 #define FW_52_8_3	((52 << 24) | (8 << 16) | (3 << 8))
61 
62 #define GET_VCE_INSTANCE(i)  ((i) << GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT \
63 					| GRBM_GFX_INDEX__VCE_ALL_PIPE)
64 
65 static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx);
66 static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev);
67 static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev);
68 static int vce_v3_0_wait_for_idle(void *handle);
69 static int vce_v3_0_set_clockgating_state(void *handle,
70 					  enum amd_clockgating_state state);
71 /**
72  * vce_v3_0_ring_get_rptr - get read pointer
73  *
74  * @ring: amdgpu_ring pointer
75  *
76  * Returns the current hardware read pointer
77  */
78 static uint64_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring)
79 {
80 	struct amdgpu_device *adev = ring->adev;
81 	u32 v;
82 
83 	mutex_lock(&adev->grbm_idx_mutex);
84 	if (adev->vce.harvest_config == 0 ||
85 		adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1)
86 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
87 	else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
88 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
89 
90 	if (ring->me == 0)
91 		v = RREG32(mmVCE_RB_RPTR);
92 	else if (ring->me == 1)
93 		v = RREG32(mmVCE_RB_RPTR2);
94 	else
95 		v = RREG32(mmVCE_RB_RPTR3);
96 
97 	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
98 	mutex_unlock(&adev->grbm_idx_mutex);
99 
100 	return v;
101 }
102 
103 /**
104  * vce_v3_0_ring_get_wptr - get write pointer
105  *
106  * @ring: amdgpu_ring pointer
107  *
108  * Returns the current hardware write pointer
109  */
110 static uint64_t vce_v3_0_ring_get_wptr(struct amdgpu_ring *ring)
111 {
112 	struct amdgpu_device *adev = ring->adev;
113 	u32 v;
114 
115 	mutex_lock(&adev->grbm_idx_mutex);
116 	if (adev->vce.harvest_config == 0 ||
117 		adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1)
118 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
119 	else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
120 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
121 
122 	if (ring->me == 0)
123 		v = RREG32(mmVCE_RB_WPTR);
124 	else if (ring->me == 1)
125 		v = RREG32(mmVCE_RB_WPTR2);
126 	else
127 		v = RREG32(mmVCE_RB_WPTR3);
128 
129 	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
130 	mutex_unlock(&adev->grbm_idx_mutex);
131 
132 	return v;
133 }
134 
135 /**
136  * vce_v3_0_ring_set_wptr - set write pointer
137  *
138  * @ring: amdgpu_ring pointer
139  *
140  * Commits the write pointer to the hardware
141  */
142 static void vce_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
143 {
144 	struct amdgpu_device *adev = ring->adev;
145 
146 	mutex_lock(&adev->grbm_idx_mutex);
147 	if (adev->vce.harvest_config == 0 ||
148 		adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1)
149 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
150 	else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
151 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
152 
153 	if (ring->me == 0)
154 		WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
155 	else if (ring->me == 1)
156 		WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
157 	else
158 		WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr));
159 
160 	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
161 	mutex_unlock(&adev->grbm_idx_mutex);
162 }
163 
164 static void vce_v3_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
165 {
166 	WREG32_FIELD(VCE_RB_ARB_CTRL, VCE_CGTT_OVERRIDE, override ? 1 : 0);
167 }
168 
169 static void vce_v3_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
170 					     bool gated)
171 {
172 	u32 data;
173 
174 	/* Set Override to disable Clock Gating */
175 	vce_v3_0_override_vce_clock_gating(adev, true);
176 
177 	/* This function enables MGCG which is controlled by firmware.
178 	   With the clocks in the gated state the core is still
179 	   accessible but the firmware will throttle the clocks on the
180 	   fly as necessary.
181 	*/
182 	if (!gated) {
183 		data = RREG32(mmVCE_CLOCK_GATING_B);
184 		data |= 0x1ff;
185 		data &= ~0xef0000;
186 		WREG32(mmVCE_CLOCK_GATING_B, data);
187 
188 		data = RREG32(mmVCE_UENC_CLOCK_GATING);
189 		data |= 0x3ff000;
190 		data &= ~0xffc00000;
191 		WREG32(mmVCE_UENC_CLOCK_GATING, data);
192 
193 		data = RREG32(mmVCE_UENC_CLOCK_GATING_2);
194 		data |= 0x2;
195 		data &= ~0x00010000;
196 		WREG32(mmVCE_UENC_CLOCK_GATING_2, data);
197 
198 		data = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
199 		data |= 0x37f;
200 		WREG32(mmVCE_UENC_REG_CLOCK_GATING, data);
201 
202 		data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL);
203 		data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
204 			VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
205 			VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
206 			0x8;
207 		WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data);
208 	} else {
209 		data = RREG32(mmVCE_CLOCK_GATING_B);
210 		data &= ~0x80010;
211 		data |= 0xe70008;
212 		WREG32(mmVCE_CLOCK_GATING_B, data);
213 
214 		data = RREG32(mmVCE_UENC_CLOCK_GATING);
215 		data |= 0xffc00000;
216 		WREG32(mmVCE_UENC_CLOCK_GATING, data);
217 
218 		data = RREG32(mmVCE_UENC_CLOCK_GATING_2);
219 		data |= 0x10000;
220 		WREG32(mmVCE_UENC_CLOCK_GATING_2, data);
221 
222 		data = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
223 		data &= ~0x3ff;
224 		WREG32(mmVCE_UENC_REG_CLOCK_GATING, data);
225 
226 		data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL);
227 		data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
228 			  VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
229 			  VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
230 			  0x8);
231 		WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data);
232 	}
233 	vce_v3_0_override_vce_clock_gating(adev, false);
234 }
235 
236 static int vce_v3_0_firmware_loaded(struct amdgpu_device *adev)
237 {
238 	int i, j;
239 
240 	for (i = 0; i < 10; ++i) {
241 		for (j = 0; j < 100; ++j) {
242 			uint32_t status = RREG32(mmVCE_STATUS);
243 
244 			if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
245 				return 0;
246 			mdelay(10);
247 		}
248 
249 		DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
250 		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 1);
251 		mdelay(10);
252 		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 0);
253 		mdelay(10);
254 	}
255 
256 	return -ETIMEDOUT;
257 }
258 
259 /**
260  * vce_v3_0_start - start VCE block
261  *
262  * @adev: amdgpu_device pointer
263  *
264  * Setup and start the VCE block
265  */
266 static int vce_v3_0_start(struct amdgpu_device *adev)
267 {
268 	struct amdgpu_ring *ring;
269 	int idx, r;
270 
271 	mutex_lock(&adev->grbm_idx_mutex);
272 	for (idx = 0; idx < 2; ++idx) {
273 		if (adev->vce.harvest_config & (1 << idx))
274 			continue;
275 
276 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(idx));
277 
278 		/* Program instance 0 reg space for two instances or instance 0 case
279 		program instance 1 reg space for only instance 1 available case */
280 		if (idx != 1 || adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0) {
281 			ring = &adev->vce.ring[0];
282 			WREG32(mmVCE_RB_RPTR, lower_32_bits(ring->wptr));
283 			WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
284 			WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr);
285 			WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
286 			WREG32(mmVCE_RB_SIZE, ring->ring_size / 4);
287 
288 			ring = &adev->vce.ring[1];
289 			WREG32(mmVCE_RB_RPTR2, lower_32_bits(ring->wptr));
290 			WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
291 			WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr);
292 			WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
293 			WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4);
294 
295 			ring = &adev->vce.ring[2];
296 			WREG32(mmVCE_RB_RPTR3, lower_32_bits(ring->wptr));
297 			WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr));
298 			WREG32(mmVCE_RB_BASE_LO3, ring->gpu_addr);
299 			WREG32(mmVCE_RB_BASE_HI3, upper_32_bits(ring->gpu_addr));
300 			WREG32(mmVCE_RB_SIZE3, ring->ring_size / 4);
301 		}
302 
303 		vce_v3_0_mc_resume(adev, idx);
304 		WREG32_FIELD(VCE_STATUS, JOB_BUSY, 1);
305 
306 		if (adev->asic_type >= CHIP_STONEY)
307 			WREG32_P(mmVCE_VCPU_CNTL, 1, ~0x200001);
308 		else
309 			WREG32_FIELD(VCE_VCPU_CNTL, CLK_EN, 1);
310 
311 		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 0);
312 		mdelay(100);
313 
314 		r = vce_v3_0_firmware_loaded(adev);
315 
316 		/* clear BUSY flag */
317 		WREG32_FIELD(VCE_STATUS, JOB_BUSY, 0);
318 
319 		if (r) {
320 			DRM_ERROR("VCE not responding, giving up!!!\n");
321 			mutex_unlock(&adev->grbm_idx_mutex);
322 			return r;
323 		}
324 	}
325 
326 	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
327 	mutex_unlock(&adev->grbm_idx_mutex);
328 
329 	return 0;
330 }
331 
332 static int vce_v3_0_stop(struct amdgpu_device *adev)
333 {
334 	int idx;
335 
336 	mutex_lock(&adev->grbm_idx_mutex);
337 	for (idx = 0; idx < 2; ++idx) {
338 		if (adev->vce.harvest_config & (1 << idx))
339 			continue;
340 
341 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(idx));
342 
343 		if (adev->asic_type >= CHIP_STONEY)
344 			WREG32_P(mmVCE_VCPU_CNTL, 0, ~0x200001);
345 		else
346 			WREG32_FIELD(VCE_VCPU_CNTL, CLK_EN, 0);
347 
348 		/* hold on ECPU */
349 		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 1);
350 
351 		/* clear VCE STATUS */
352 		WREG32(mmVCE_STATUS, 0);
353 	}
354 
355 	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
356 	mutex_unlock(&adev->grbm_idx_mutex);
357 
358 	return 0;
359 }
360 
361 #define ixVCE_HARVEST_FUSE_MACRO__ADDRESS     0xC0014074
362 #define VCE_HARVEST_FUSE_MACRO__SHIFT       27
363 #define VCE_HARVEST_FUSE_MACRO__MASK        0x18000000
364 
365 static unsigned vce_v3_0_get_harvest_config(struct amdgpu_device *adev)
366 {
367 	u32 tmp;
368 
369 	if ((adev->asic_type == CHIP_FIJI) ||
370 	    (adev->asic_type == CHIP_STONEY))
371 		return AMDGPU_VCE_HARVEST_VCE1;
372 
373 	if (adev->flags & AMD_IS_APU)
374 		tmp = (RREG32_SMC(ixVCE_HARVEST_FUSE_MACRO__ADDRESS) &
375 		       VCE_HARVEST_FUSE_MACRO__MASK) >>
376 			VCE_HARVEST_FUSE_MACRO__SHIFT;
377 	else
378 		tmp = (RREG32_SMC(ixCC_HARVEST_FUSES) &
379 		       CC_HARVEST_FUSES__VCE_DISABLE_MASK) >>
380 			CC_HARVEST_FUSES__VCE_DISABLE__SHIFT;
381 
382 	switch (tmp) {
383 	case 1:
384 		return AMDGPU_VCE_HARVEST_VCE0;
385 	case 2:
386 		return AMDGPU_VCE_HARVEST_VCE1;
387 	case 3:
388 		return AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1;
389 	default:
390 		if ((adev->asic_type == CHIP_POLARIS10) ||
391 		    (adev->asic_type == CHIP_POLARIS11) ||
392 		    (adev->asic_type == CHIP_POLARIS12) ||
393 		    (adev->asic_type == CHIP_VEGAM))
394 			return AMDGPU_VCE_HARVEST_VCE1;
395 
396 		return 0;
397 	}
398 }
399 
400 static int vce_v3_0_early_init(void *handle)
401 {
402 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
403 
404 	adev->vce.harvest_config = vce_v3_0_get_harvest_config(adev);
405 
406 	if ((adev->vce.harvest_config &
407 	     (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1)) ==
408 	    (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1))
409 		return -ENOENT;
410 
411 	adev->vce.num_rings = 3;
412 
413 	vce_v3_0_set_ring_funcs(adev);
414 	vce_v3_0_set_irq_funcs(adev);
415 
416 	return 0;
417 }
418 
419 static int vce_v3_0_sw_init(void *handle)
420 {
421 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
422 	struct amdgpu_ring *ring;
423 	int r, i;
424 
425 	/* VCE */
426 	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_VCE_TRAP, &adev->vce.irq);
427 	if (r)
428 		return r;
429 
430 	r = amdgpu_vce_sw_init(adev, VCE_V3_0_FW_SIZE +
431 		(VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE) * 2);
432 	if (r)
433 		return r;
434 
435 	/* 52.8.3 required for 3 ring support */
436 	if (adev->vce.fw_version < FW_52_8_3)
437 		adev->vce.num_rings = 2;
438 
439 	r = amdgpu_vce_resume(adev);
440 	if (r)
441 		return r;
442 
443 	for (i = 0; i < adev->vce.num_rings; i++) {
444 		ring = &adev->vce.ring[i];
445 		sprintf(ring->name, "vce%d", i);
446 		r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
447 		if (r)
448 			return r;
449 	}
450 
451 	r = amdgpu_vce_entity_init(adev);
452 
453 	return r;
454 }
455 
456 static int vce_v3_0_sw_fini(void *handle)
457 {
458 	int r;
459 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
460 
461 	r = amdgpu_vce_suspend(adev);
462 	if (r)
463 		return r;
464 
465 	return amdgpu_vce_sw_fini(adev);
466 }
467 
468 static int vce_v3_0_hw_init(void *handle)
469 {
470 	int r, i;
471 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
472 
473 	vce_v3_0_override_vce_clock_gating(adev, true);
474 
475 	amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
476 
477 	for (i = 0; i < adev->vce.num_rings; i++)
478 		adev->vce.ring[i].ready = false;
479 
480 	for (i = 0; i < adev->vce.num_rings; i++) {
481 		r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
482 		if (r)
483 			return r;
484 		else
485 			adev->vce.ring[i].ready = true;
486 	}
487 
488 	DRM_INFO("VCE initialized successfully.\n");
489 
490 	return 0;
491 }
492 
493 static int vce_v3_0_hw_fini(void *handle)
494 {
495 	int r;
496 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
497 
498 	r = vce_v3_0_wait_for_idle(handle);
499 	if (r)
500 		return r;
501 
502 	vce_v3_0_stop(adev);
503 	return vce_v3_0_set_clockgating_state(adev, AMD_CG_STATE_GATE);
504 }
505 
506 static int vce_v3_0_suspend(void *handle)
507 {
508 	int r;
509 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
510 
511 	r = vce_v3_0_hw_fini(adev);
512 	if (r)
513 		return r;
514 
515 	return amdgpu_vce_suspend(adev);
516 }
517 
518 static int vce_v3_0_resume(void *handle)
519 {
520 	int r;
521 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
522 
523 	r = amdgpu_vce_resume(adev);
524 	if (r)
525 		return r;
526 
527 	return vce_v3_0_hw_init(adev);
528 }
529 
530 static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx)
531 {
532 	uint32_t offset, size;
533 
534 	WREG32_P(mmVCE_CLOCK_GATING_A, 0, ~(1 << 16));
535 	WREG32_P(mmVCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000);
536 	WREG32_P(mmVCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F);
537 	WREG32(mmVCE_CLOCK_GATING_B, 0x1FF);
538 
539 	WREG32(mmVCE_LMI_CTRL, 0x00398000);
540 	WREG32_P(mmVCE_LMI_CACHE_CTRL, 0x0, ~0x1);
541 	WREG32(mmVCE_LMI_SWAP_CNTL, 0);
542 	WREG32(mmVCE_LMI_SWAP_CNTL1, 0);
543 	WREG32(mmVCE_LMI_VM_CTRL, 0);
544 	WREG32_OR(mmVCE_VCPU_CNTL, 0x00100000);
545 
546 	if (adev->asic_type >= CHIP_STONEY) {
547 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR0, (adev->vce.gpu_addr >> 8));
548 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR1, (adev->vce.gpu_addr >> 8));
549 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR2, (adev->vce.gpu_addr >> 8));
550 	} else
551 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8));
552 	offset = AMDGPU_VCE_FIRMWARE_OFFSET;
553 	size = VCE_V3_0_FW_SIZE;
554 	WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff);
555 	WREG32(mmVCE_VCPU_CACHE_SIZE0, size);
556 
557 	if (idx == 0) {
558 		offset += size;
559 		size = VCE_V3_0_STACK_SIZE;
560 		WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0x7fffffff);
561 		WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
562 		offset += size;
563 		size = VCE_V3_0_DATA_SIZE;
564 		WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0x7fffffff);
565 		WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
566 	} else {
567 		offset += size + VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE;
568 		size = VCE_V3_0_STACK_SIZE;
569 		WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0xfffffff);
570 		WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
571 		offset += size;
572 		size = VCE_V3_0_DATA_SIZE;
573 		WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0xfffffff);
574 		WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
575 	}
576 
577 	WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100);
578 	WREG32_FIELD(VCE_SYS_INT_EN, VCE_SYS_INT_TRAP_INTERRUPT_EN, 1);
579 }
580 
581 static bool vce_v3_0_is_idle(void *handle)
582 {
583 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
584 	u32 mask = 0;
585 
586 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
587 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
588 
589 	return !(RREG32(mmSRBM_STATUS2) & mask);
590 }
591 
592 static int vce_v3_0_wait_for_idle(void *handle)
593 {
594 	unsigned i;
595 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
596 
597 	for (i = 0; i < adev->usec_timeout; i++)
598 		if (vce_v3_0_is_idle(handle))
599 			return 0;
600 
601 	return -ETIMEDOUT;
602 }
603 
604 #define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
605 #define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
606 #define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
607 #define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
608 				      VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
609 
610 static bool vce_v3_0_check_soft_reset(void *handle)
611 {
612 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
613 	u32 srbm_soft_reset = 0;
614 
615 	/* According to VCE team , we should use VCE_STATUS instead
616 	 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
617 	 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
618 	 * instance's registers are accessed
619 	 * (0 for 1st instance, 10 for 2nd instance).
620 	 *
621 	 *VCE_STATUS
622 	 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
623 	 *|----+----+-----------+----+----+----+----------+---------+----|
624 	 *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
625 	 *
626 	 * VCE team suggest use bit 3--bit 6 for busy status check
627 	 */
628 	mutex_lock(&adev->grbm_idx_mutex);
629 	WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
630 	if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
631 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
632 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
633 	}
634 	WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
635 	if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
636 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
637 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
638 	}
639 	WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
640 	mutex_unlock(&adev->grbm_idx_mutex);
641 
642 	if (srbm_soft_reset) {
643 		adev->vce.srbm_soft_reset = srbm_soft_reset;
644 		return true;
645 	} else {
646 		adev->vce.srbm_soft_reset = 0;
647 		return false;
648 	}
649 }
650 
651 static int vce_v3_0_soft_reset(void *handle)
652 {
653 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
654 	u32 srbm_soft_reset;
655 
656 	if (!adev->vce.srbm_soft_reset)
657 		return 0;
658 	srbm_soft_reset = adev->vce.srbm_soft_reset;
659 
660 	if (srbm_soft_reset) {
661 		u32 tmp;
662 
663 		tmp = RREG32(mmSRBM_SOFT_RESET);
664 		tmp |= srbm_soft_reset;
665 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
666 		WREG32(mmSRBM_SOFT_RESET, tmp);
667 		tmp = RREG32(mmSRBM_SOFT_RESET);
668 
669 		udelay(50);
670 
671 		tmp &= ~srbm_soft_reset;
672 		WREG32(mmSRBM_SOFT_RESET, tmp);
673 		tmp = RREG32(mmSRBM_SOFT_RESET);
674 
675 		/* Wait a little for things to settle down */
676 		udelay(50);
677 	}
678 
679 	return 0;
680 }
681 
682 static int vce_v3_0_pre_soft_reset(void *handle)
683 {
684 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
685 
686 	if (!adev->vce.srbm_soft_reset)
687 		return 0;
688 
689 	mdelay(5);
690 
691 	return vce_v3_0_suspend(adev);
692 }
693 
694 
695 static int vce_v3_0_post_soft_reset(void *handle)
696 {
697 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
698 
699 	if (!adev->vce.srbm_soft_reset)
700 		return 0;
701 
702 	mdelay(5);
703 
704 	return vce_v3_0_resume(adev);
705 }
706 
707 static int vce_v3_0_set_interrupt_state(struct amdgpu_device *adev,
708 					struct amdgpu_irq_src *source,
709 					unsigned type,
710 					enum amdgpu_interrupt_state state)
711 {
712 	uint32_t val = 0;
713 
714 	if (state == AMDGPU_IRQ_STATE_ENABLE)
715 		val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
716 
717 	WREG32_P(mmVCE_SYS_INT_EN, val, ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
718 	return 0;
719 }
720 
721 static int vce_v3_0_process_interrupt(struct amdgpu_device *adev,
722 				      struct amdgpu_irq_src *source,
723 				      struct amdgpu_iv_entry *entry)
724 {
725 	DRM_DEBUG("IH: VCE\n");
726 
727 	WREG32_FIELD(VCE_SYS_INT_STATUS, VCE_SYS_INT_TRAP_INTERRUPT_INT, 1);
728 
729 	switch (entry->src_data[0]) {
730 	case 0:
731 	case 1:
732 	case 2:
733 		amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
734 		break;
735 	default:
736 		DRM_ERROR("Unhandled interrupt: %d %d\n",
737 			  entry->src_id, entry->src_data[0]);
738 		break;
739 	}
740 
741 	return 0;
742 }
743 
744 static int vce_v3_0_set_clockgating_state(void *handle,
745 					  enum amd_clockgating_state state)
746 {
747 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
748 	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
749 	int i;
750 
751 	if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
752 		return 0;
753 
754 	mutex_lock(&adev->grbm_idx_mutex);
755 	for (i = 0; i < 2; i++) {
756 		/* Program VCE Instance 0 or 1 if not harvested */
757 		if (adev->vce.harvest_config & (1 << i))
758 			continue;
759 
760 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(i));
761 
762 		if (!enable) {
763 			/* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
764 			uint32_t data = RREG32(mmVCE_CLOCK_GATING_A);
765 			data &= ~(0xf | 0xff0);
766 			data |= ((0x0 << 0) | (0x04 << 4));
767 			WREG32(mmVCE_CLOCK_GATING_A, data);
768 
769 			/* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
770 			data = RREG32(mmVCE_UENC_CLOCK_GATING);
771 			data &= ~(0xf | 0xff0);
772 			data |= ((0x0 << 0) | (0x04 << 4));
773 			WREG32(mmVCE_UENC_CLOCK_GATING, data);
774 		}
775 
776 		vce_v3_0_set_vce_sw_clock_gating(adev, enable);
777 	}
778 
779 	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
780 	mutex_unlock(&adev->grbm_idx_mutex);
781 
782 	return 0;
783 }
784 
785 static int vce_v3_0_set_powergating_state(void *handle,
786 					  enum amd_powergating_state state)
787 {
788 	/* This doesn't actually powergate the VCE block.
789 	 * That's done in the dpm code via the SMC.  This
790 	 * just re-inits the block as necessary.  The actual
791 	 * gating still happens in the dpm code.  We should
792 	 * revisit this when there is a cleaner line between
793 	 * the smc and the hw blocks
794 	 */
795 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
796 	int ret = 0;
797 
798 	if (state == AMD_PG_STATE_GATE) {
799 		ret = vce_v3_0_stop(adev);
800 		if (ret)
801 			goto out;
802 	} else {
803 		ret = vce_v3_0_start(adev);
804 		if (ret)
805 			goto out;
806 	}
807 
808 out:
809 	return ret;
810 }
811 
812 static void vce_v3_0_get_clockgating_state(void *handle, u32 *flags)
813 {
814 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
815 	int data;
816 
817 	mutex_lock(&adev->pm.mutex);
818 
819 	if (adev->flags & AMD_IS_APU)
820 		data = RREG32_SMC(ixCURRENT_PG_STATUS_APU);
821 	else
822 		data = RREG32_SMC(ixCURRENT_PG_STATUS);
823 
824 	if (data & CURRENT_PG_STATUS__VCE_PG_STATUS_MASK) {
825 		DRM_INFO("Cannot get clockgating state when VCE is powergated.\n");
826 		goto out;
827 	}
828 
829 	WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
830 
831 	/* AMD_CG_SUPPORT_VCE_MGCG */
832 	data = RREG32(mmVCE_CLOCK_GATING_A);
833 	if (data & (0x04 << 4))
834 		*flags |= AMD_CG_SUPPORT_VCE_MGCG;
835 
836 out:
837 	mutex_unlock(&adev->pm.mutex);
838 }
839 
840 static void vce_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
841 		struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch)
842 {
843 	amdgpu_ring_write(ring, VCE_CMD_IB_VM);
844 	amdgpu_ring_write(ring, vmid);
845 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
846 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
847 	amdgpu_ring_write(ring, ib->length_dw);
848 }
849 
850 static void vce_v3_0_emit_vm_flush(struct amdgpu_ring *ring,
851 				   unsigned int vmid, uint64_t pd_addr)
852 {
853 	amdgpu_ring_write(ring, VCE_CMD_UPDATE_PTB);
854 	amdgpu_ring_write(ring, vmid);
855 	amdgpu_ring_write(ring, pd_addr >> 12);
856 
857 	amdgpu_ring_write(ring, VCE_CMD_FLUSH_TLB);
858 	amdgpu_ring_write(ring, vmid);
859 	amdgpu_ring_write(ring, VCE_CMD_END);
860 }
861 
862 static void vce_v3_0_emit_pipeline_sync(struct amdgpu_ring *ring)
863 {
864 	uint32_t seq = ring->fence_drv.sync_seq;
865 	uint64_t addr = ring->fence_drv.gpu_addr;
866 
867 	amdgpu_ring_write(ring, VCE_CMD_WAIT_GE);
868 	amdgpu_ring_write(ring, lower_32_bits(addr));
869 	amdgpu_ring_write(ring, upper_32_bits(addr));
870 	amdgpu_ring_write(ring, seq);
871 }
872 
873 static const struct amd_ip_funcs vce_v3_0_ip_funcs = {
874 	.name = "vce_v3_0",
875 	.early_init = vce_v3_0_early_init,
876 	.late_init = NULL,
877 	.sw_init = vce_v3_0_sw_init,
878 	.sw_fini = vce_v3_0_sw_fini,
879 	.hw_init = vce_v3_0_hw_init,
880 	.hw_fini = vce_v3_0_hw_fini,
881 	.suspend = vce_v3_0_suspend,
882 	.resume = vce_v3_0_resume,
883 	.is_idle = vce_v3_0_is_idle,
884 	.wait_for_idle = vce_v3_0_wait_for_idle,
885 	.check_soft_reset = vce_v3_0_check_soft_reset,
886 	.pre_soft_reset = vce_v3_0_pre_soft_reset,
887 	.soft_reset = vce_v3_0_soft_reset,
888 	.post_soft_reset = vce_v3_0_post_soft_reset,
889 	.set_clockgating_state = vce_v3_0_set_clockgating_state,
890 	.set_powergating_state = vce_v3_0_set_powergating_state,
891 	.get_clockgating_state = vce_v3_0_get_clockgating_state,
892 };
893 
894 static const struct amdgpu_ring_funcs vce_v3_0_ring_phys_funcs = {
895 	.type = AMDGPU_RING_TYPE_VCE,
896 	.align_mask = 0xf,
897 	.nop = VCE_CMD_NO_OP,
898 	.support_64bit_ptrs = false,
899 	.get_rptr = vce_v3_0_ring_get_rptr,
900 	.get_wptr = vce_v3_0_ring_get_wptr,
901 	.set_wptr = vce_v3_0_ring_set_wptr,
902 	.parse_cs = amdgpu_vce_ring_parse_cs,
903 	.emit_frame_size =
904 		4 + /* vce_v3_0_emit_pipeline_sync */
905 		6, /* amdgpu_vce_ring_emit_fence x1 no user fence */
906 	.emit_ib_size = 4, /* amdgpu_vce_ring_emit_ib */
907 	.emit_ib = amdgpu_vce_ring_emit_ib,
908 	.emit_fence = amdgpu_vce_ring_emit_fence,
909 	.test_ring = amdgpu_vce_ring_test_ring,
910 	.test_ib = amdgpu_vce_ring_test_ib,
911 	.insert_nop = amdgpu_ring_insert_nop,
912 	.pad_ib = amdgpu_ring_generic_pad_ib,
913 	.begin_use = amdgpu_vce_ring_begin_use,
914 	.end_use = amdgpu_vce_ring_end_use,
915 };
916 
917 static const struct amdgpu_ring_funcs vce_v3_0_ring_vm_funcs = {
918 	.type = AMDGPU_RING_TYPE_VCE,
919 	.align_mask = 0xf,
920 	.nop = VCE_CMD_NO_OP,
921 	.support_64bit_ptrs = false,
922 	.get_rptr = vce_v3_0_ring_get_rptr,
923 	.get_wptr = vce_v3_0_ring_get_wptr,
924 	.set_wptr = vce_v3_0_ring_set_wptr,
925 	.parse_cs = amdgpu_vce_ring_parse_cs_vm,
926 	.emit_frame_size =
927 		6 + /* vce_v3_0_emit_vm_flush */
928 		4 + /* vce_v3_0_emit_pipeline_sync */
929 		6 + 6, /* amdgpu_vce_ring_emit_fence x2 vm fence */
930 	.emit_ib_size = 5, /* vce_v3_0_ring_emit_ib */
931 	.emit_ib = vce_v3_0_ring_emit_ib,
932 	.emit_vm_flush = vce_v3_0_emit_vm_flush,
933 	.emit_pipeline_sync = vce_v3_0_emit_pipeline_sync,
934 	.emit_fence = amdgpu_vce_ring_emit_fence,
935 	.test_ring = amdgpu_vce_ring_test_ring,
936 	.test_ib = amdgpu_vce_ring_test_ib,
937 	.insert_nop = amdgpu_ring_insert_nop,
938 	.pad_ib = amdgpu_ring_generic_pad_ib,
939 	.begin_use = amdgpu_vce_ring_begin_use,
940 	.end_use = amdgpu_vce_ring_end_use,
941 };
942 
943 static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev)
944 {
945 	int i;
946 
947 	if (adev->asic_type >= CHIP_STONEY) {
948 		for (i = 0; i < adev->vce.num_rings; i++) {
949 			adev->vce.ring[i].funcs = &vce_v3_0_ring_vm_funcs;
950 			adev->vce.ring[i].me = i;
951 		}
952 		DRM_INFO("VCE enabled in VM mode\n");
953 	} else {
954 		for (i = 0; i < adev->vce.num_rings; i++) {
955 			adev->vce.ring[i].funcs = &vce_v3_0_ring_phys_funcs;
956 			adev->vce.ring[i].me = i;
957 		}
958 		DRM_INFO("VCE enabled in physical mode\n");
959 	}
960 }
961 
962 static const struct amdgpu_irq_src_funcs vce_v3_0_irq_funcs = {
963 	.set = vce_v3_0_set_interrupt_state,
964 	.process = vce_v3_0_process_interrupt,
965 };
966 
967 static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev)
968 {
969 	adev->vce.irq.num_types = 1;
970 	adev->vce.irq.funcs = &vce_v3_0_irq_funcs;
971 };
972 
973 const struct amdgpu_ip_block_version vce_v3_0_ip_block =
974 {
975 	.type = AMD_IP_BLOCK_TYPE_VCE,
976 	.major = 3,
977 	.minor = 0,
978 	.rev = 0,
979 	.funcs = &vce_v3_0_ip_funcs,
980 };
981 
982 const struct amdgpu_ip_block_version vce_v3_1_ip_block =
983 {
984 	.type = AMD_IP_BLOCK_TYPE_VCE,
985 	.major = 3,
986 	.minor = 1,
987 	.rev = 0,
988 	.funcs = &vce_v3_0_ip_funcs,
989 };
990 
991 const struct amdgpu_ip_block_version vce_v3_4_ip_block =
992 {
993 	.type = AMD_IP_BLOCK_TYPE_VCE,
994 	.major = 3,
995 	.minor = 4,
996 	.rev = 0,
997 	.funcs = &vce_v3_0_ip_funcs,
998 };
999