xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c (revision ef6239e0)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  * Authors: Christian König <christian.koenig@amd.com>
26  */
27 
28 #include <linux/firmware.h>
29 #include <drm/drmP.h>
30 #include "amdgpu.h"
31 #include "amdgpu_vce.h"
32 #include "vid.h"
33 #include "vce/vce_3_0_d.h"
34 #include "vce/vce_3_0_sh_mask.h"
35 #include "oss/oss_3_0_d.h"
36 #include "oss/oss_3_0_sh_mask.h"
37 #include "gca/gfx_8_0_d.h"
38 #include "smu/smu_7_1_2_d.h"
39 #include "smu/smu_7_1_2_sh_mask.h"
40 #include "gca/gfx_8_0_d.h"
41 #include "gca/gfx_8_0_sh_mask.h"
42 
43 
44 #define GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT	0x04
45 #define GRBM_GFX_INDEX__VCE_INSTANCE_MASK	0x10
46 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR0	0x8616
47 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR1	0x8617
48 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR2	0x8618
49 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
50 
51 #define VCE_V3_0_FW_SIZE	(384 * 1024)
52 #define VCE_V3_0_STACK_SIZE	(64 * 1024)
53 #define VCE_V3_0_DATA_SIZE	((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
54 
55 #define FW_52_8_3	((52 << 24) | (8 << 16) | (3 << 8))
56 
57 static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx);
58 static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev);
59 static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev);
60 static int vce_v3_0_wait_for_idle(void *handle);
61 
62 /**
63  * vce_v3_0_ring_get_rptr - get read pointer
64  *
65  * @ring: amdgpu_ring pointer
66  *
67  * Returns the current hardware read pointer
68  */
69 static uint32_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring)
70 {
71 	struct amdgpu_device *adev = ring->adev;
72 
73 	if (ring == &adev->vce.ring[0])
74 		return RREG32(mmVCE_RB_RPTR);
75 	else if (ring == &adev->vce.ring[1])
76 		return RREG32(mmVCE_RB_RPTR2);
77 	else
78 		return RREG32(mmVCE_RB_RPTR3);
79 }
80 
81 /**
82  * vce_v3_0_ring_get_wptr - get write pointer
83  *
84  * @ring: amdgpu_ring pointer
85  *
86  * Returns the current hardware write pointer
87  */
88 static uint32_t vce_v3_0_ring_get_wptr(struct amdgpu_ring *ring)
89 {
90 	struct amdgpu_device *adev = ring->adev;
91 
92 	if (ring == &adev->vce.ring[0])
93 		return RREG32(mmVCE_RB_WPTR);
94 	else if (ring == &adev->vce.ring[1])
95 		return RREG32(mmVCE_RB_WPTR2);
96 	else
97 		return RREG32(mmVCE_RB_WPTR3);
98 }
99 
100 /**
101  * vce_v3_0_ring_set_wptr - set write pointer
102  *
103  * @ring: amdgpu_ring pointer
104  *
105  * Commits the write pointer to the hardware
106  */
107 static void vce_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
108 {
109 	struct amdgpu_device *adev = ring->adev;
110 
111 	if (ring == &adev->vce.ring[0])
112 		WREG32(mmVCE_RB_WPTR, ring->wptr);
113 	else if (ring == &adev->vce.ring[1])
114 		WREG32(mmVCE_RB_WPTR2, ring->wptr);
115 	else
116 		WREG32(mmVCE_RB_WPTR3, ring->wptr);
117 }
118 
119 static void vce_v3_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
120 {
121 	WREG32_FIELD(VCE_RB_ARB_CTRL, VCE_CGTT_OVERRIDE, override ? 1 : 0);
122 }
123 
124 static void vce_v3_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
125 					     bool gated)
126 {
127 	u32 data;
128 
129 	/* Set Override to disable Clock Gating */
130 	vce_v3_0_override_vce_clock_gating(adev, true);
131 
132 	/* This function enables MGCG which is controlled by firmware.
133 	   With the clocks in the gated state the core is still
134 	   accessible but the firmware will throttle the clocks on the
135 	   fly as necessary.
136 	*/
137 	if (gated) {
138 		data = RREG32(mmVCE_CLOCK_GATING_B);
139 		data |= 0x1ff;
140 		data &= ~0xef0000;
141 		WREG32(mmVCE_CLOCK_GATING_B, data);
142 
143 		data = RREG32(mmVCE_UENC_CLOCK_GATING);
144 		data |= 0x3ff000;
145 		data &= ~0xffc00000;
146 		WREG32(mmVCE_UENC_CLOCK_GATING, data);
147 
148 		data = RREG32(mmVCE_UENC_CLOCK_GATING_2);
149 		data |= 0x2;
150 		data &= ~0x00010000;
151 		WREG32(mmVCE_UENC_CLOCK_GATING_2, data);
152 
153 		data = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
154 		data |= 0x37f;
155 		WREG32(mmVCE_UENC_REG_CLOCK_GATING, data);
156 
157 		data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL);
158 		data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
159 			VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
160 			VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
161 			0x8;
162 		WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data);
163 	} else {
164 		data = RREG32(mmVCE_CLOCK_GATING_B);
165 		data &= ~0x80010;
166 		data |= 0xe70008;
167 		WREG32(mmVCE_CLOCK_GATING_B, data);
168 
169 		data = RREG32(mmVCE_UENC_CLOCK_GATING);
170 		data |= 0xffc00000;
171 		WREG32(mmVCE_UENC_CLOCK_GATING, data);
172 
173 		data = RREG32(mmVCE_UENC_CLOCK_GATING_2);
174 		data |= 0x10000;
175 		WREG32(mmVCE_UENC_CLOCK_GATING_2, data);
176 
177 		data = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
178 		data &= ~0xffc00000;
179 		WREG32(mmVCE_UENC_REG_CLOCK_GATING, data);
180 
181 		data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL);
182 		data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
183 			  VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
184 			  VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
185 			  0x8);
186 		WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data);
187 	}
188 	vce_v3_0_override_vce_clock_gating(adev, false);
189 }
190 
191 static int vce_v3_0_firmware_loaded(struct amdgpu_device *adev)
192 {
193 	int i, j;
194 
195 	for (i = 0; i < 10; ++i) {
196 		for (j = 0; j < 100; ++j) {
197 			uint32_t status = RREG32(mmVCE_STATUS);
198 
199 			if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
200 				return 0;
201 			mdelay(10);
202 		}
203 
204 		DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
205 		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 1);
206 		mdelay(10);
207 		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 0);
208 		mdelay(10);
209 	}
210 
211 	return -ETIMEDOUT;
212 }
213 
214 /**
215  * vce_v3_0_start - start VCE block
216  *
217  * @adev: amdgpu_device pointer
218  *
219  * Setup and start the VCE block
220  */
221 static int vce_v3_0_start(struct amdgpu_device *adev)
222 {
223 	struct amdgpu_ring *ring;
224 	int idx, r;
225 
226 	ring = &adev->vce.ring[0];
227 	WREG32(mmVCE_RB_RPTR, ring->wptr);
228 	WREG32(mmVCE_RB_WPTR, ring->wptr);
229 	WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr);
230 	WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
231 	WREG32(mmVCE_RB_SIZE, ring->ring_size / 4);
232 
233 	ring = &adev->vce.ring[1];
234 	WREG32(mmVCE_RB_RPTR2, ring->wptr);
235 	WREG32(mmVCE_RB_WPTR2, ring->wptr);
236 	WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr);
237 	WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
238 	WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4);
239 
240 	ring = &adev->vce.ring[2];
241 	WREG32(mmVCE_RB_RPTR3, ring->wptr);
242 	WREG32(mmVCE_RB_WPTR3, ring->wptr);
243 	WREG32(mmVCE_RB_BASE_LO3, ring->gpu_addr);
244 	WREG32(mmVCE_RB_BASE_HI3, upper_32_bits(ring->gpu_addr));
245 	WREG32(mmVCE_RB_SIZE3, ring->ring_size / 4);
246 
247 	mutex_lock(&adev->grbm_idx_mutex);
248 	for (idx = 0; idx < 2; ++idx) {
249 		if (adev->vce.harvest_config & (1 << idx))
250 			continue;
251 
252 		WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, idx);
253 		vce_v3_0_mc_resume(adev, idx);
254 		WREG32_FIELD(VCE_STATUS, JOB_BUSY, 1);
255 
256 		if (adev->asic_type >= CHIP_STONEY)
257 			WREG32_P(mmVCE_VCPU_CNTL, 1, ~0x200001);
258 		else
259 			WREG32_FIELD(VCE_VCPU_CNTL, CLK_EN, 1);
260 
261 		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 0);
262 		mdelay(100);
263 
264 		r = vce_v3_0_firmware_loaded(adev);
265 
266 		/* clear BUSY flag */
267 		WREG32_FIELD(VCE_STATUS, JOB_BUSY, 0);
268 
269 		if (r) {
270 			DRM_ERROR("VCE not responding, giving up!!!\n");
271 			mutex_unlock(&adev->grbm_idx_mutex);
272 			return r;
273 		}
274 	}
275 
276 	WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
277 	mutex_unlock(&adev->grbm_idx_mutex);
278 
279 	return 0;
280 }
281 
282 static int vce_v3_0_stop(struct amdgpu_device *adev)
283 {
284 	int idx;
285 
286 	mutex_lock(&adev->grbm_idx_mutex);
287 	for (idx = 0; idx < 2; ++idx) {
288 		if (adev->vce.harvest_config & (1 << idx))
289 			continue;
290 
291 		WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, idx);
292 
293 		if (adev->asic_type >= CHIP_STONEY)
294 			WREG32_P(mmVCE_VCPU_CNTL, 0, ~0x200001);
295 		else
296 			WREG32_FIELD(VCE_VCPU_CNTL, CLK_EN, 0);
297 
298 		/* hold on ECPU */
299 		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 1);
300 
301 		/* clear BUSY flag */
302 		WREG32_FIELD(VCE_STATUS, JOB_BUSY, 0);
303 
304 		/* Set Clock-Gating off */
305 		if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
306 			vce_v3_0_set_vce_sw_clock_gating(adev, false);
307 	}
308 
309 	WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
310 	mutex_unlock(&adev->grbm_idx_mutex);
311 
312 	return 0;
313 }
314 
315 #define ixVCE_HARVEST_FUSE_MACRO__ADDRESS     0xC0014074
316 #define VCE_HARVEST_FUSE_MACRO__SHIFT       27
317 #define VCE_HARVEST_FUSE_MACRO__MASK        0x18000000
318 
319 static unsigned vce_v3_0_get_harvest_config(struct amdgpu_device *adev)
320 {
321 	u32 tmp;
322 
323 	/* Fiji, Stoney, Polaris10, Polaris11 are single pipe */
324 	if ((adev->asic_type == CHIP_FIJI) ||
325 	    (adev->asic_type == CHIP_STONEY) ||
326 	    (adev->asic_type == CHIP_POLARIS10) ||
327 	    (adev->asic_type == CHIP_POLARIS11))
328 		return AMDGPU_VCE_HARVEST_VCE1;
329 
330 	/* Tonga and CZ are dual or single pipe */
331 	if (adev->flags & AMD_IS_APU)
332 		tmp = (RREG32_SMC(ixVCE_HARVEST_FUSE_MACRO__ADDRESS) &
333 		       VCE_HARVEST_FUSE_MACRO__MASK) >>
334 			VCE_HARVEST_FUSE_MACRO__SHIFT;
335 	else
336 		tmp = (RREG32_SMC(ixCC_HARVEST_FUSES) &
337 		       CC_HARVEST_FUSES__VCE_DISABLE_MASK) >>
338 			CC_HARVEST_FUSES__VCE_DISABLE__SHIFT;
339 
340 	switch (tmp) {
341 	case 1:
342 		return AMDGPU_VCE_HARVEST_VCE0;
343 	case 2:
344 		return AMDGPU_VCE_HARVEST_VCE1;
345 	case 3:
346 		return AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1;
347 	default:
348 		return 0;
349 	}
350 }
351 
352 static int vce_v3_0_early_init(void *handle)
353 {
354 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
355 
356 	adev->vce.harvest_config = vce_v3_0_get_harvest_config(adev);
357 
358 	if ((adev->vce.harvest_config &
359 	     (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1)) ==
360 	    (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1))
361 		return -ENOENT;
362 
363 	adev->vce.num_rings = 3;
364 
365 	vce_v3_0_set_ring_funcs(adev);
366 	vce_v3_0_set_irq_funcs(adev);
367 
368 	return 0;
369 }
370 
371 static int vce_v3_0_sw_init(void *handle)
372 {
373 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
374 	struct amdgpu_ring *ring;
375 	int r, i;
376 
377 	/* VCE */
378 	r = amdgpu_irq_add_id(adev, 167, &adev->vce.irq);
379 	if (r)
380 		return r;
381 
382 	r = amdgpu_vce_sw_init(adev, VCE_V3_0_FW_SIZE +
383 		(VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE) * 2);
384 	if (r)
385 		return r;
386 
387 	/* 52.8.3 required for 3 ring support */
388 	if (adev->vce.fw_version < FW_52_8_3)
389 		adev->vce.num_rings = 2;
390 
391 	r = amdgpu_vce_resume(adev);
392 	if (r)
393 		return r;
394 
395 	for (i = 0; i < adev->vce.num_rings; i++) {
396 		ring = &adev->vce.ring[i];
397 		sprintf(ring->name, "vce%d", i);
398 		r = amdgpu_ring_init(adev, ring, 512, VCE_CMD_NO_OP, 0xf,
399 				     &adev->vce.irq, 0, AMDGPU_RING_TYPE_VCE);
400 		if (r)
401 			return r;
402 	}
403 
404 	return r;
405 }
406 
407 static int vce_v3_0_sw_fini(void *handle)
408 {
409 	int r;
410 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
411 
412 	r = amdgpu_vce_suspend(adev);
413 	if (r)
414 		return r;
415 
416 	r = amdgpu_vce_sw_fini(adev);
417 	if (r)
418 		return r;
419 
420 	return r;
421 }
422 
423 static int vce_v3_0_hw_init(void *handle)
424 {
425 	int r, i;
426 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
427 
428 	r = vce_v3_0_start(adev);
429 	if (r)
430 		return r;
431 
432 	for (i = 0; i < adev->vce.num_rings; i++)
433 		adev->vce.ring[i].ready = false;
434 
435 	for (i = 0; i < adev->vce.num_rings; i++) {
436 		r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
437 		if (r)
438 			return r;
439 		else
440 			adev->vce.ring[i].ready = true;
441 	}
442 
443 	DRM_INFO("VCE initialized successfully.\n");
444 
445 	return 0;
446 }
447 
448 static int vce_v3_0_hw_fini(void *handle)
449 {
450 	int r;
451 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
452 
453 	r = vce_v3_0_wait_for_idle(handle);
454 	if (r)
455 		return r;
456 
457 	return vce_v3_0_stop(adev);
458 }
459 
460 static int vce_v3_0_suspend(void *handle)
461 {
462 	int r;
463 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
464 
465 	r = vce_v3_0_hw_fini(adev);
466 	if (r)
467 		return r;
468 
469 	r = amdgpu_vce_suspend(adev);
470 	if (r)
471 		return r;
472 
473 	return r;
474 }
475 
476 static int vce_v3_0_resume(void *handle)
477 {
478 	int r;
479 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
480 
481 	r = amdgpu_vce_resume(adev);
482 	if (r)
483 		return r;
484 
485 	r = vce_v3_0_hw_init(adev);
486 	if (r)
487 		return r;
488 
489 	return r;
490 }
491 
492 static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx)
493 {
494 	uint32_t offset, size;
495 
496 	WREG32_P(mmVCE_CLOCK_GATING_A, 0, ~(1 << 16));
497 	WREG32_P(mmVCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000);
498 	WREG32_P(mmVCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F);
499 	WREG32(mmVCE_CLOCK_GATING_B, 0x1FF);
500 
501 	WREG32(mmVCE_LMI_CTRL, 0x00398000);
502 	WREG32_P(mmVCE_LMI_CACHE_CTRL, 0x0, ~0x1);
503 	WREG32(mmVCE_LMI_SWAP_CNTL, 0);
504 	WREG32(mmVCE_LMI_SWAP_CNTL1, 0);
505 	WREG32(mmVCE_LMI_VM_CTRL, 0);
506 	if (adev->asic_type >= CHIP_STONEY) {
507 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR0, (adev->vce.gpu_addr >> 8));
508 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR1, (adev->vce.gpu_addr >> 8));
509 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR2, (adev->vce.gpu_addr >> 8));
510 	} else
511 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8));
512 	offset = AMDGPU_VCE_FIRMWARE_OFFSET;
513 	size = VCE_V3_0_FW_SIZE;
514 	WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff);
515 	WREG32(mmVCE_VCPU_CACHE_SIZE0, size);
516 
517 	if (idx == 0) {
518 		offset += size;
519 		size = VCE_V3_0_STACK_SIZE;
520 		WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0x7fffffff);
521 		WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
522 		offset += size;
523 		size = VCE_V3_0_DATA_SIZE;
524 		WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0x7fffffff);
525 		WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
526 	} else {
527 		offset += size + VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE;
528 		size = VCE_V3_0_STACK_SIZE;
529 		WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0xfffffff);
530 		WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
531 		offset += size;
532 		size = VCE_V3_0_DATA_SIZE;
533 		WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0xfffffff);
534 		WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
535 	}
536 
537 	WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100);
538 	WREG32_FIELD(VCE_SYS_INT_EN, VCE_SYS_INT_TRAP_INTERRUPT_EN, 1);
539 }
540 
541 static bool vce_v3_0_is_idle(void *handle)
542 {
543 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
544 	u32 mask = 0;
545 
546 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
547 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
548 
549 	return !(RREG32(mmSRBM_STATUS2) & mask);
550 }
551 
552 static int vce_v3_0_wait_for_idle(void *handle)
553 {
554 	unsigned i;
555 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
556 
557 	for (i = 0; i < adev->usec_timeout; i++)
558 		if (vce_v3_0_is_idle(handle))
559 			return 0;
560 
561 	return -ETIMEDOUT;
562 }
563 
564 #define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
565 #define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
566 #define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
567 #define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
568 				      VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
569 
570 static bool vce_v3_0_check_soft_reset(void *handle)
571 {
572 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
573 	u32 srbm_soft_reset = 0;
574 
575 	/* According to VCE team , we should use VCE_STATUS instead
576 	 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
577 	 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
578 	 * instance's registers are accessed
579 	 * (0 for 1st instance, 10 for 2nd instance).
580 	 *
581 	 *VCE_STATUS
582 	 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
583 	 *|----+----+-----------+----+----+----+----------+---------+----|
584 	 *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
585 	 *
586 	 * VCE team suggest use bit 3--bit 6 for busy status check
587 	 */
588 	mutex_lock(&adev->grbm_idx_mutex);
589 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
590 	if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
591 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
592 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
593 	}
594 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
595 	if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
596 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
597 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
598 	}
599 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
600 	mutex_unlock(&adev->grbm_idx_mutex);
601 
602 	if (srbm_soft_reset) {
603 		adev->vce.srbm_soft_reset = srbm_soft_reset;
604 		return true;
605 	} else {
606 		adev->vce.srbm_soft_reset = 0;
607 		return false;
608 	}
609 }
610 
611 static int vce_v3_0_soft_reset(void *handle)
612 {
613 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
614 	u32 srbm_soft_reset;
615 
616 	if (!adev->vce.srbm_soft_reset)
617 		return 0;
618 	srbm_soft_reset = adev->vce.srbm_soft_reset;
619 
620 	if (srbm_soft_reset) {
621 		u32 tmp;
622 
623 		tmp = RREG32(mmSRBM_SOFT_RESET);
624 		tmp |= srbm_soft_reset;
625 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
626 		WREG32(mmSRBM_SOFT_RESET, tmp);
627 		tmp = RREG32(mmSRBM_SOFT_RESET);
628 
629 		udelay(50);
630 
631 		tmp &= ~srbm_soft_reset;
632 		WREG32(mmSRBM_SOFT_RESET, tmp);
633 		tmp = RREG32(mmSRBM_SOFT_RESET);
634 
635 		/* Wait a little for things to settle down */
636 		udelay(50);
637 	}
638 
639 	return 0;
640 }
641 
642 static int vce_v3_0_pre_soft_reset(void *handle)
643 {
644 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
645 
646 	if (!adev->vce.srbm_soft_reset)
647 		return 0;
648 
649 	mdelay(5);
650 
651 	return vce_v3_0_suspend(adev);
652 }
653 
654 
655 static int vce_v3_0_post_soft_reset(void *handle)
656 {
657 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
658 
659 	if (!adev->vce.srbm_soft_reset)
660 		return 0;
661 
662 	mdelay(5);
663 
664 	return vce_v3_0_resume(adev);
665 }
666 
667 static int vce_v3_0_set_interrupt_state(struct amdgpu_device *adev,
668 					struct amdgpu_irq_src *source,
669 					unsigned type,
670 					enum amdgpu_interrupt_state state)
671 {
672 	uint32_t val = 0;
673 
674 	if (state == AMDGPU_IRQ_STATE_ENABLE)
675 		val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
676 
677 	WREG32_P(mmVCE_SYS_INT_EN, val, ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
678 	return 0;
679 }
680 
681 static int vce_v3_0_process_interrupt(struct amdgpu_device *adev,
682 				      struct amdgpu_irq_src *source,
683 				      struct amdgpu_iv_entry *entry)
684 {
685 	DRM_DEBUG("IH: VCE\n");
686 
687 	WREG32_FIELD(VCE_SYS_INT_STATUS, VCE_SYS_INT_TRAP_INTERRUPT_INT, 1);
688 
689 	switch (entry->src_data) {
690 	case 0:
691 	case 1:
692 	case 2:
693 		amdgpu_fence_process(&adev->vce.ring[entry->src_data]);
694 		break;
695 	default:
696 		DRM_ERROR("Unhandled interrupt: %d %d\n",
697 			  entry->src_id, entry->src_data);
698 		break;
699 	}
700 
701 	return 0;
702 }
703 
704 static void vce_v3_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
705 {
706 	u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
707 
708 	if (enable)
709 		tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
710 	else
711 		tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
712 
713 	WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
714 }
715 
716 static int vce_v3_0_set_clockgating_state(void *handle,
717 					  enum amd_clockgating_state state)
718 {
719 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
720 	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
721 	int i;
722 
723 	if ((adev->asic_type == CHIP_POLARIS10) ||
724 		(adev->asic_type == CHIP_TONGA) ||
725 		(adev->asic_type == CHIP_FIJI))
726 		vce_v3_0_set_bypass_mode(adev, enable);
727 
728 	if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
729 		return 0;
730 
731 	mutex_lock(&adev->grbm_idx_mutex);
732 	for (i = 0; i < 2; i++) {
733 		/* Program VCE Instance 0 or 1 if not harvested */
734 		if (adev->vce.harvest_config & (1 << i))
735 			continue;
736 
737 		WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
738 
739 		if (enable) {
740 			/* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
741 			uint32_t data = RREG32(mmVCE_CLOCK_GATING_A);
742 			data &= ~(0xf | 0xff0);
743 			data |= ((0x0 << 0) | (0x04 << 4));
744 			WREG32(mmVCE_CLOCK_GATING_A, data);
745 
746 			/* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
747 			data = RREG32(mmVCE_UENC_CLOCK_GATING);
748 			data &= ~(0xf | 0xff0);
749 			data |= ((0x0 << 0) | (0x04 << 4));
750 			WREG32(mmVCE_UENC_CLOCK_GATING, data);
751 		}
752 
753 		vce_v3_0_set_vce_sw_clock_gating(adev, enable);
754 	}
755 
756 	WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
757 	mutex_unlock(&adev->grbm_idx_mutex);
758 
759 	return 0;
760 }
761 
762 static int vce_v3_0_set_powergating_state(void *handle,
763 					  enum amd_powergating_state state)
764 {
765 	/* This doesn't actually powergate the VCE block.
766 	 * That's done in the dpm code via the SMC.  This
767 	 * just re-inits the block as necessary.  The actual
768 	 * gating still happens in the dpm code.  We should
769 	 * revisit this when there is a cleaner line between
770 	 * the smc and the hw blocks
771 	 */
772 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
773 
774 	if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE))
775 		return 0;
776 
777 	if (state == AMD_PG_STATE_GATE)
778 		/* XXX do we need a vce_v3_0_stop()? */
779 		return 0;
780 	else
781 		return vce_v3_0_start(adev);
782 }
783 
784 static void vce_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
785 		struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch)
786 {
787 	amdgpu_ring_write(ring, VCE_CMD_IB_VM);
788 	amdgpu_ring_write(ring, vm_id);
789 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
790 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
791 	amdgpu_ring_write(ring, ib->length_dw);
792 }
793 
794 static void vce_v3_0_emit_vm_flush(struct amdgpu_ring *ring,
795 			 unsigned int vm_id, uint64_t pd_addr)
796 {
797 	amdgpu_ring_write(ring, VCE_CMD_UPDATE_PTB);
798 	amdgpu_ring_write(ring, vm_id);
799 	amdgpu_ring_write(ring, pd_addr >> 12);
800 
801 	amdgpu_ring_write(ring, VCE_CMD_FLUSH_TLB);
802 	amdgpu_ring_write(ring, vm_id);
803 	amdgpu_ring_write(ring, VCE_CMD_END);
804 }
805 
806 static void vce_v3_0_emit_pipeline_sync(struct amdgpu_ring *ring)
807 {
808 	uint32_t seq = ring->fence_drv.sync_seq;
809 	uint64_t addr = ring->fence_drv.gpu_addr;
810 
811 	amdgpu_ring_write(ring, VCE_CMD_WAIT_GE);
812 	amdgpu_ring_write(ring, lower_32_bits(addr));
813 	amdgpu_ring_write(ring, upper_32_bits(addr));
814 	amdgpu_ring_write(ring, seq);
815 }
816 
817 static unsigned vce_v3_0_ring_get_emit_ib_size(struct amdgpu_ring *ring)
818 {
819 	return
820 		5; /* vce_v3_0_ring_emit_ib */
821 }
822 
823 static unsigned vce_v3_0_ring_get_dma_frame_size(struct amdgpu_ring *ring)
824 {
825 	return
826 		4 + /* vce_v3_0_emit_pipeline_sync */
827 		6; /* amdgpu_vce_ring_emit_fence x1 no user fence */
828 }
829 
830 static unsigned vce_v3_0_ring_get_dma_frame_size_vm(struct amdgpu_ring *ring)
831 {
832 	return
833 		6 + /* vce_v3_0_emit_vm_flush */
834 		4 + /* vce_v3_0_emit_pipeline_sync */
835 		6 + 6; /* amdgpu_vce_ring_emit_fence x2 vm fence */
836 }
837 
838 const struct amd_ip_funcs vce_v3_0_ip_funcs = {
839 	.name = "vce_v3_0",
840 	.early_init = vce_v3_0_early_init,
841 	.late_init = NULL,
842 	.sw_init = vce_v3_0_sw_init,
843 	.sw_fini = vce_v3_0_sw_fini,
844 	.hw_init = vce_v3_0_hw_init,
845 	.hw_fini = vce_v3_0_hw_fini,
846 	.suspend = vce_v3_0_suspend,
847 	.resume = vce_v3_0_resume,
848 	.is_idle = vce_v3_0_is_idle,
849 	.wait_for_idle = vce_v3_0_wait_for_idle,
850 	.check_soft_reset = vce_v3_0_check_soft_reset,
851 	.pre_soft_reset = vce_v3_0_pre_soft_reset,
852 	.soft_reset = vce_v3_0_soft_reset,
853 	.post_soft_reset = vce_v3_0_post_soft_reset,
854 	.set_clockgating_state = vce_v3_0_set_clockgating_state,
855 	.set_powergating_state = vce_v3_0_set_powergating_state,
856 };
857 
858 static const struct amdgpu_ring_funcs vce_v3_0_ring_phys_funcs = {
859 	.get_rptr = vce_v3_0_ring_get_rptr,
860 	.get_wptr = vce_v3_0_ring_get_wptr,
861 	.set_wptr = vce_v3_0_ring_set_wptr,
862 	.parse_cs = amdgpu_vce_ring_parse_cs,
863 	.emit_ib = amdgpu_vce_ring_emit_ib,
864 	.emit_fence = amdgpu_vce_ring_emit_fence,
865 	.test_ring = amdgpu_vce_ring_test_ring,
866 	.test_ib = amdgpu_vce_ring_test_ib,
867 	.insert_nop = amdgpu_ring_insert_nop,
868 	.pad_ib = amdgpu_ring_generic_pad_ib,
869 	.begin_use = amdgpu_vce_ring_begin_use,
870 	.end_use = amdgpu_vce_ring_end_use,
871 	.get_emit_ib_size = vce_v3_0_ring_get_emit_ib_size,
872 	.get_dma_frame_size = vce_v3_0_ring_get_dma_frame_size,
873 };
874 
875 static const struct amdgpu_ring_funcs vce_v3_0_ring_vm_funcs = {
876 	.get_rptr = vce_v3_0_ring_get_rptr,
877 	.get_wptr = vce_v3_0_ring_get_wptr,
878 	.set_wptr = vce_v3_0_ring_set_wptr,
879 	.parse_cs = NULL,
880 	.emit_ib = vce_v3_0_ring_emit_ib,
881 	.emit_vm_flush = vce_v3_0_emit_vm_flush,
882 	.emit_pipeline_sync = vce_v3_0_emit_pipeline_sync,
883 	.emit_fence = amdgpu_vce_ring_emit_fence,
884 	.test_ring = amdgpu_vce_ring_test_ring,
885 	.test_ib = amdgpu_vce_ring_test_ib,
886 	.insert_nop = amdgpu_ring_insert_nop,
887 	.pad_ib = amdgpu_ring_generic_pad_ib,
888 	.begin_use = amdgpu_vce_ring_begin_use,
889 	.end_use = amdgpu_vce_ring_end_use,
890 	.get_emit_ib_size = vce_v3_0_ring_get_emit_ib_size,
891 	.get_dma_frame_size = vce_v3_0_ring_get_dma_frame_size_vm,
892 };
893 
894 static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev)
895 {
896 	int i;
897 
898 	if (adev->asic_type >= CHIP_STONEY) {
899 		for (i = 0; i < adev->vce.num_rings; i++)
900 			adev->vce.ring[i].funcs = &vce_v3_0_ring_vm_funcs;
901 		DRM_INFO("VCE enabled in VM mode\n");
902 	} else {
903 		for (i = 0; i < adev->vce.num_rings; i++)
904 			adev->vce.ring[i].funcs = &vce_v3_0_ring_phys_funcs;
905 		DRM_INFO("VCE enabled in physical mode\n");
906 	}
907 }
908 
909 static const struct amdgpu_irq_src_funcs vce_v3_0_irq_funcs = {
910 	.set = vce_v3_0_set_interrupt_state,
911 	.process = vce_v3_0_process_interrupt,
912 };
913 
914 static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev)
915 {
916 	adev->vce.irq.num_types = 1;
917 	adev->vce.irq.funcs = &vce_v3_0_irq_funcs;
918 };
919