xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c (revision e5f586c763a079349398e2b0c7c271386193ac34)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  * Authors: Christian König <christian.koenig@amd.com>
26  */
27 
28 #include <linux/firmware.h>
29 #include <drm/drmP.h>
30 #include "amdgpu.h"
31 #include "amdgpu_vce.h"
32 #include "vid.h"
33 #include "vce/vce_3_0_d.h"
34 #include "vce/vce_3_0_sh_mask.h"
35 #include "oss/oss_3_0_d.h"
36 #include "oss/oss_3_0_sh_mask.h"
37 #include "gca/gfx_8_0_d.h"
38 #include "smu/smu_7_1_2_d.h"
39 #include "smu/smu_7_1_2_sh_mask.h"
40 #include "gca/gfx_8_0_d.h"
41 #include "gca/gfx_8_0_sh_mask.h"
42 
43 
44 #define GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT	0x04
45 #define GRBM_GFX_INDEX__VCE_INSTANCE_MASK	0x10
46 #define GRBM_GFX_INDEX__VCE_ALL_PIPE		0x07
47 
48 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR0	0x8616
49 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR1	0x8617
50 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR2	0x8618
51 #define mmGRBM_GFX_INDEX_DEFAULT 0xE0000000
52 
53 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
54 
55 #define VCE_V3_0_FW_SIZE	(384 * 1024)
56 #define VCE_V3_0_STACK_SIZE	(64 * 1024)
57 #define VCE_V3_0_DATA_SIZE	((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
58 
59 #define FW_52_8_3	((52 << 24) | (8 << 16) | (3 << 8))
60 
61 #define GET_VCE_INSTANCE(i)  ((i) << GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT \
62 					| GRBM_GFX_INDEX__VCE_ALL_PIPE)
63 
64 static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx);
65 static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev);
66 static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev);
67 static int vce_v3_0_wait_for_idle(void *handle);
68 static int vce_v3_0_set_clockgating_state(void *handle,
69 					  enum amd_clockgating_state state);
70 /**
71  * vce_v3_0_ring_get_rptr - get read pointer
72  *
73  * @ring: amdgpu_ring pointer
74  *
75  * Returns the current hardware read pointer
76  */
77 static uint64_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring)
78 {
79 	struct amdgpu_device *adev = ring->adev;
80 
81 	if (ring == &adev->vce.ring[0])
82 		return RREG32(mmVCE_RB_RPTR);
83 	else if (ring == &adev->vce.ring[1])
84 		return RREG32(mmVCE_RB_RPTR2);
85 	else
86 		return RREG32(mmVCE_RB_RPTR3);
87 }
88 
89 /**
90  * vce_v3_0_ring_get_wptr - get write pointer
91  *
92  * @ring: amdgpu_ring pointer
93  *
94  * Returns the current hardware write pointer
95  */
96 static uint64_t vce_v3_0_ring_get_wptr(struct amdgpu_ring *ring)
97 {
98 	struct amdgpu_device *adev = ring->adev;
99 
100 	if (ring == &adev->vce.ring[0])
101 		return RREG32(mmVCE_RB_WPTR);
102 	else if (ring == &adev->vce.ring[1])
103 		return RREG32(mmVCE_RB_WPTR2);
104 	else
105 		return RREG32(mmVCE_RB_WPTR3);
106 }
107 
108 /**
109  * vce_v3_0_ring_set_wptr - set write pointer
110  *
111  * @ring: amdgpu_ring pointer
112  *
113  * Commits the write pointer to the hardware
114  */
115 static void vce_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
116 {
117 	struct amdgpu_device *adev = ring->adev;
118 
119 	if (ring == &adev->vce.ring[0])
120 		WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
121 	else if (ring == &adev->vce.ring[1])
122 		WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
123 	else
124 		WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr));
125 }
126 
127 static void vce_v3_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
128 {
129 	WREG32_FIELD(VCE_RB_ARB_CTRL, VCE_CGTT_OVERRIDE, override ? 1 : 0);
130 }
131 
132 static void vce_v3_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
133 					     bool gated)
134 {
135 	u32 data;
136 
137 	/* Set Override to disable Clock Gating */
138 	vce_v3_0_override_vce_clock_gating(adev, true);
139 
140 	/* This function enables MGCG which is controlled by firmware.
141 	   With the clocks in the gated state the core is still
142 	   accessible but the firmware will throttle the clocks on the
143 	   fly as necessary.
144 	*/
145 	if (!gated) {
146 		data = RREG32(mmVCE_CLOCK_GATING_B);
147 		data |= 0x1ff;
148 		data &= ~0xef0000;
149 		WREG32(mmVCE_CLOCK_GATING_B, data);
150 
151 		data = RREG32(mmVCE_UENC_CLOCK_GATING);
152 		data |= 0x3ff000;
153 		data &= ~0xffc00000;
154 		WREG32(mmVCE_UENC_CLOCK_GATING, data);
155 
156 		data = RREG32(mmVCE_UENC_CLOCK_GATING_2);
157 		data |= 0x2;
158 		data &= ~0x00010000;
159 		WREG32(mmVCE_UENC_CLOCK_GATING_2, data);
160 
161 		data = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
162 		data |= 0x37f;
163 		WREG32(mmVCE_UENC_REG_CLOCK_GATING, data);
164 
165 		data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL);
166 		data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
167 			VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
168 			VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
169 			0x8;
170 		WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data);
171 	} else {
172 		data = RREG32(mmVCE_CLOCK_GATING_B);
173 		data &= ~0x80010;
174 		data |= 0xe70008;
175 		WREG32(mmVCE_CLOCK_GATING_B, data);
176 
177 		data = RREG32(mmVCE_UENC_CLOCK_GATING);
178 		data |= 0xffc00000;
179 		WREG32(mmVCE_UENC_CLOCK_GATING, data);
180 
181 		data = RREG32(mmVCE_UENC_CLOCK_GATING_2);
182 		data |= 0x10000;
183 		WREG32(mmVCE_UENC_CLOCK_GATING_2, data);
184 
185 		data = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
186 		data &= ~0x3ff;
187 		WREG32(mmVCE_UENC_REG_CLOCK_GATING, data);
188 
189 		data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL);
190 		data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
191 			  VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
192 			  VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
193 			  0x8);
194 		WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data);
195 	}
196 	vce_v3_0_override_vce_clock_gating(adev, false);
197 }
198 
199 static int vce_v3_0_firmware_loaded(struct amdgpu_device *adev)
200 {
201 	int i, j;
202 
203 	for (i = 0; i < 10; ++i) {
204 		for (j = 0; j < 100; ++j) {
205 			uint32_t status = RREG32(mmVCE_STATUS);
206 
207 			if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
208 				return 0;
209 			mdelay(10);
210 		}
211 
212 		DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
213 		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 1);
214 		mdelay(10);
215 		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 0);
216 		mdelay(10);
217 	}
218 
219 	return -ETIMEDOUT;
220 }
221 
222 /**
223  * vce_v3_0_start - start VCE block
224  *
225  * @adev: amdgpu_device pointer
226  *
227  * Setup and start the VCE block
228  */
229 static int vce_v3_0_start(struct amdgpu_device *adev)
230 {
231 	struct amdgpu_ring *ring;
232 	int idx, r;
233 
234 	ring = &adev->vce.ring[0];
235 	WREG32(mmVCE_RB_RPTR, lower_32_bits(ring->wptr));
236 	WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
237 	WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr);
238 	WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
239 	WREG32(mmVCE_RB_SIZE, ring->ring_size / 4);
240 
241 	ring = &adev->vce.ring[1];
242 	WREG32(mmVCE_RB_RPTR2, lower_32_bits(ring->wptr));
243 	WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
244 	WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr);
245 	WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
246 	WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4);
247 
248 	ring = &adev->vce.ring[2];
249 	WREG32(mmVCE_RB_RPTR3, lower_32_bits(ring->wptr));
250 	WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr));
251 	WREG32(mmVCE_RB_BASE_LO3, ring->gpu_addr);
252 	WREG32(mmVCE_RB_BASE_HI3, upper_32_bits(ring->gpu_addr));
253 	WREG32(mmVCE_RB_SIZE3, ring->ring_size / 4);
254 
255 	mutex_lock(&adev->grbm_idx_mutex);
256 	for (idx = 0; idx < 2; ++idx) {
257 		if (adev->vce.harvest_config & (1 << idx))
258 			continue;
259 
260 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(idx));
261 		vce_v3_0_mc_resume(adev, idx);
262 		WREG32_FIELD(VCE_STATUS, JOB_BUSY, 1);
263 
264 		if (adev->asic_type >= CHIP_STONEY)
265 			WREG32_P(mmVCE_VCPU_CNTL, 1, ~0x200001);
266 		else
267 			WREG32_FIELD(VCE_VCPU_CNTL, CLK_EN, 1);
268 
269 		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 0);
270 		mdelay(100);
271 
272 		r = vce_v3_0_firmware_loaded(adev);
273 
274 		/* clear BUSY flag */
275 		WREG32_FIELD(VCE_STATUS, JOB_BUSY, 0);
276 
277 		if (r) {
278 			DRM_ERROR("VCE not responding, giving up!!!\n");
279 			mutex_unlock(&adev->grbm_idx_mutex);
280 			return r;
281 		}
282 	}
283 
284 	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
285 	mutex_unlock(&adev->grbm_idx_mutex);
286 
287 	return 0;
288 }
289 
290 static int vce_v3_0_stop(struct amdgpu_device *adev)
291 {
292 	int idx;
293 
294 	mutex_lock(&adev->grbm_idx_mutex);
295 	for (idx = 0; idx < 2; ++idx) {
296 		if (adev->vce.harvest_config & (1 << idx))
297 			continue;
298 
299 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(idx));
300 
301 		if (adev->asic_type >= CHIP_STONEY)
302 			WREG32_P(mmVCE_VCPU_CNTL, 0, ~0x200001);
303 		else
304 			WREG32_FIELD(VCE_VCPU_CNTL, CLK_EN, 0);
305 
306 		/* hold on ECPU */
307 		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 1);
308 
309 		/* clear VCE STATUS */
310 		WREG32(mmVCE_STATUS, 0);
311 	}
312 
313 	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
314 	mutex_unlock(&adev->grbm_idx_mutex);
315 
316 	return 0;
317 }
318 
319 #define ixVCE_HARVEST_FUSE_MACRO__ADDRESS     0xC0014074
320 #define VCE_HARVEST_FUSE_MACRO__SHIFT       27
321 #define VCE_HARVEST_FUSE_MACRO__MASK        0x18000000
322 
323 static unsigned vce_v3_0_get_harvest_config(struct amdgpu_device *adev)
324 {
325 	u32 tmp;
326 
327 	/* Fiji, Stoney, Polaris10, Polaris11, Polaris12 are single pipe */
328 	if ((adev->asic_type == CHIP_FIJI) ||
329 	    (adev->asic_type == CHIP_STONEY) ||
330 	    (adev->asic_type == CHIP_POLARIS10) ||
331 	    (adev->asic_type == CHIP_POLARIS11) ||
332 	    (adev->asic_type == CHIP_POLARIS12))
333 		return AMDGPU_VCE_HARVEST_VCE1;
334 
335 	/* Tonga and CZ are dual or single pipe */
336 	if (adev->flags & AMD_IS_APU)
337 		tmp = (RREG32_SMC(ixVCE_HARVEST_FUSE_MACRO__ADDRESS) &
338 		       VCE_HARVEST_FUSE_MACRO__MASK) >>
339 			VCE_HARVEST_FUSE_MACRO__SHIFT;
340 	else
341 		tmp = (RREG32_SMC(ixCC_HARVEST_FUSES) &
342 		       CC_HARVEST_FUSES__VCE_DISABLE_MASK) >>
343 			CC_HARVEST_FUSES__VCE_DISABLE__SHIFT;
344 
345 	switch (tmp) {
346 	case 1:
347 		return AMDGPU_VCE_HARVEST_VCE0;
348 	case 2:
349 		return AMDGPU_VCE_HARVEST_VCE1;
350 	case 3:
351 		return AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1;
352 	default:
353 		return 0;
354 	}
355 }
356 
357 static int vce_v3_0_early_init(void *handle)
358 {
359 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
360 
361 	adev->vce.harvest_config = vce_v3_0_get_harvest_config(adev);
362 
363 	if ((adev->vce.harvest_config &
364 	     (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1)) ==
365 	    (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1))
366 		return -ENOENT;
367 
368 	adev->vce.num_rings = 3;
369 
370 	vce_v3_0_set_ring_funcs(adev);
371 	vce_v3_0_set_irq_funcs(adev);
372 
373 	return 0;
374 }
375 
376 static int vce_v3_0_sw_init(void *handle)
377 {
378 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
379 	struct amdgpu_ring *ring;
380 	int r, i;
381 
382 	/* VCE */
383 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 167, &adev->vce.irq);
384 	if (r)
385 		return r;
386 
387 	r = amdgpu_vce_sw_init(adev, VCE_V3_0_FW_SIZE +
388 		(VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE) * 2);
389 	if (r)
390 		return r;
391 
392 	/* 52.8.3 required for 3 ring support */
393 	if (adev->vce.fw_version < FW_52_8_3)
394 		adev->vce.num_rings = 2;
395 
396 	r = amdgpu_vce_resume(adev);
397 	if (r)
398 		return r;
399 
400 	for (i = 0; i < adev->vce.num_rings; i++) {
401 		ring = &adev->vce.ring[i];
402 		sprintf(ring->name, "vce%d", i);
403 		r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
404 		if (r)
405 			return r;
406 	}
407 
408 	return r;
409 }
410 
411 static int vce_v3_0_sw_fini(void *handle)
412 {
413 	int r;
414 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
415 
416 	r = amdgpu_vce_suspend(adev);
417 	if (r)
418 		return r;
419 
420 	r = amdgpu_vce_sw_fini(adev);
421 	if (r)
422 		return r;
423 
424 	return r;
425 }
426 
427 static int vce_v3_0_hw_init(void *handle)
428 {
429 	int r, i;
430 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
431 
432 	vce_v3_0_override_vce_clock_gating(adev, true);
433 	if (!(adev->flags & AMD_IS_APU))
434 		amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
435 
436 	for (i = 0; i < adev->vce.num_rings; i++)
437 		adev->vce.ring[i].ready = false;
438 
439 	for (i = 0; i < adev->vce.num_rings; i++) {
440 		r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
441 		if (r)
442 			return r;
443 		else
444 			adev->vce.ring[i].ready = true;
445 	}
446 
447 	DRM_INFO("VCE initialized successfully.\n");
448 
449 	return 0;
450 }
451 
452 static int vce_v3_0_hw_fini(void *handle)
453 {
454 	int r;
455 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
456 
457 	r = vce_v3_0_wait_for_idle(handle);
458 	if (r)
459 		return r;
460 
461 	vce_v3_0_stop(adev);
462 	return vce_v3_0_set_clockgating_state(adev, AMD_CG_STATE_GATE);
463 }
464 
465 static int vce_v3_0_suspend(void *handle)
466 {
467 	int r;
468 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
469 
470 	r = vce_v3_0_hw_fini(adev);
471 	if (r)
472 		return r;
473 
474 	r = amdgpu_vce_suspend(adev);
475 	if (r)
476 		return r;
477 
478 	return r;
479 }
480 
481 static int vce_v3_0_resume(void *handle)
482 {
483 	int r;
484 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
485 
486 	r = amdgpu_vce_resume(adev);
487 	if (r)
488 		return r;
489 
490 	r = vce_v3_0_hw_init(adev);
491 	if (r)
492 		return r;
493 
494 	return r;
495 }
496 
497 static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx)
498 {
499 	uint32_t offset, size;
500 
501 	WREG32_P(mmVCE_CLOCK_GATING_A, 0, ~(1 << 16));
502 	WREG32_P(mmVCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000);
503 	WREG32_P(mmVCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F);
504 	WREG32(mmVCE_CLOCK_GATING_B, 0x1FF);
505 
506 	WREG32(mmVCE_LMI_CTRL, 0x00398000);
507 	WREG32_P(mmVCE_LMI_CACHE_CTRL, 0x0, ~0x1);
508 	WREG32(mmVCE_LMI_SWAP_CNTL, 0);
509 	WREG32(mmVCE_LMI_SWAP_CNTL1, 0);
510 	WREG32(mmVCE_LMI_VM_CTRL, 0);
511 	WREG32_OR(mmVCE_VCPU_CNTL, 0x00100000);
512 
513 	if (adev->asic_type >= CHIP_STONEY) {
514 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR0, (adev->vce.gpu_addr >> 8));
515 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR1, (adev->vce.gpu_addr >> 8));
516 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR2, (adev->vce.gpu_addr >> 8));
517 	} else
518 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8));
519 	offset = AMDGPU_VCE_FIRMWARE_OFFSET;
520 	size = VCE_V3_0_FW_SIZE;
521 	WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff);
522 	WREG32(mmVCE_VCPU_CACHE_SIZE0, size);
523 
524 	if (idx == 0) {
525 		offset += size;
526 		size = VCE_V3_0_STACK_SIZE;
527 		WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0x7fffffff);
528 		WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
529 		offset += size;
530 		size = VCE_V3_0_DATA_SIZE;
531 		WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0x7fffffff);
532 		WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
533 	} else {
534 		offset += size + VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE;
535 		size = VCE_V3_0_STACK_SIZE;
536 		WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0xfffffff);
537 		WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
538 		offset += size;
539 		size = VCE_V3_0_DATA_SIZE;
540 		WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0xfffffff);
541 		WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
542 	}
543 
544 	WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100);
545 	WREG32_FIELD(VCE_SYS_INT_EN, VCE_SYS_INT_TRAP_INTERRUPT_EN, 1);
546 }
547 
548 static bool vce_v3_0_is_idle(void *handle)
549 {
550 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
551 	u32 mask = 0;
552 
553 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
554 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
555 
556 	return !(RREG32(mmSRBM_STATUS2) & mask);
557 }
558 
559 static int vce_v3_0_wait_for_idle(void *handle)
560 {
561 	unsigned i;
562 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
563 
564 	for (i = 0; i < adev->usec_timeout; i++)
565 		if (vce_v3_0_is_idle(handle))
566 			return 0;
567 
568 	return -ETIMEDOUT;
569 }
570 
571 #define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
572 #define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
573 #define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
574 #define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
575 				      VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
576 
577 static bool vce_v3_0_check_soft_reset(void *handle)
578 {
579 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
580 	u32 srbm_soft_reset = 0;
581 
582 	/* According to VCE team , we should use VCE_STATUS instead
583 	 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
584 	 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
585 	 * instance's registers are accessed
586 	 * (0 for 1st instance, 10 for 2nd instance).
587 	 *
588 	 *VCE_STATUS
589 	 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
590 	 *|----+----+-----------+----+----+----+----------+---------+----|
591 	 *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
592 	 *
593 	 * VCE team suggest use bit 3--bit 6 for busy status check
594 	 */
595 	mutex_lock(&adev->grbm_idx_mutex);
596 	WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
597 	if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
598 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
599 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
600 	}
601 	WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
602 	if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
603 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
604 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
605 	}
606 	WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
607 	mutex_unlock(&adev->grbm_idx_mutex);
608 
609 	if (srbm_soft_reset) {
610 		adev->vce.srbm_soft_reset = srbm_soft_reset;
611 		return true;
612 	} else {
613 		adev->vce.srbm_soft_reset = 0;
614 		return false;
615 	}
616 }
617 
618 static int vce_v3_0_soft_reset(void *handle)
619 {
620 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
621 	u32 srbm_soft_reset;
622 
623 	if (!adev->vce.srbm_soft_reset)
624 		return 0;
625 	srbm_soft_reset = adev->vce.srbm_soft_reset;
626 
627 	if (srbm_soft_reset) {
628 		u32 tmp;
629 
630 		tmp = RREG32(mmSRBM_SOFT_RESET);
631 		tmp |= srbm_soft_reset;
632 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
633 		WREG32(mmSRBM_SOFT_RESET, tmp);
634 		tmp = RREG32(mmSRBM_SOFT_RESET);
635 
636 		udelay(50);
637 
638 		tmp &= ~srbm_soft_reset;
639 		WREG32(mmSRBM_SOFT_RESET, tmp);
640 		tmp = RREG32(mmSRBM_SOFT_RESET);
641 
642 		/* Wait a little for things to settle down */
643 		udelay(50);
644 	}
645 
646 	return 0;
647 }
648 
649 static int vce_v3_0_pre_soft_reset(void *handle)
650 {
651 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
652 
653 	if (!adev->vce.srbm_soft_reset)
654 		return 0;
655 
656 	mdelay(5);
657 
658 	return vce_v3_0_suspend(adev);
659 }
660 
661 
662 static int vce_v3_0_post_soft_reset(void *handle)
663 {
664 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
665 
666 	if (!adev->vce.srbm_soft_reset)
667 		return 0;
668 
669 	mdelay(5);
670 
671 	return vce_v3_0_resume(adev);
672 }
673 
674 static int vce_v3_0_set_interrupt_state(struct amdgpu_device *adev,
675 					struct amdgpu_irq_src *source,
676 					unsigned type,
677 					enum amdgpu_interrupt_state state)
678 {
679 	uint32_t val = 0;
680 
681 	if (state == AMDGPU_IRQ_STATE_ENABLE)
682 		val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
683 
684 	WREG32_P(mmVCE_SYS_INT_EN, val, ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
685 	return 0;
686 }
687 
688 static int vce_v3_0_process_interrupt(struct amdgpu_device *adev,
689 				      struct amdgpu_irq_src *source,
690 				      struct amdgpu_iv_entry *entry)
691 {
692 	DRM_DEBUG("IH: VCE\n");
693 
694 	WREG32_FIELD(VCE_SYS_INT_STATUS, VCE_SYS_INT_TRAP_INTERRUPT_INT, 1);
695 
696 	switch (entry->src_data[0]) {
697 	case 0:
698 	case 1:
699 	case 2:
700 		amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
701 		break;
702 	default:
703 		DRM_ERROR("Unhandled interrupt: %d %d\n",
704 			  entry->src_id, entry->src_data[0]);
705 		break;
706 	}
707 
708 	return 0;
709 }
710 
711 static int vce_v3_0_set_clockgating_state(void *handle,
712 					  enum amd_clockgating_state state)
713 {
714 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
715 	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
716 	int i;
717 
718 	if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
719 		return 0;
720 
721 	mutex_lock(&adev->grbm_idx_mutex);
722 	for (i = 0; i < 2; i++) {
723 		/* Program VCE Instance 0 or 1 if not harvested */
724 		if (adev->vce.harvest_config & (1 << i))
725 			continue;
726 
727 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(i));
728 
729 		if (!enable) {
730 			/* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
731 			uint32_t data = RREG32(mmVCE_CLOCK_GATING_A);
732 			data &= ~(0xf | 0xff0);
733 			data |= ((0x0 << 0) | (0x04 << 4));
734 			WREG32(mmVCE_CLOCK_GATING_A, data);
735 
736 			/* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
737 			data = RREG32(mmVCE_UENC_CLOCK_GATING);
738 			data &= ~(0xf | 0xff0);
739 			data |= ((0x0 << 0) | (0x04 << 4));
740 			WREG32(mmVCE_UENC_CLOCK_GATING, data);
741 		}
742 
743 		vce_v3_0_set_vce_sw_clock_gating(adev, enable);
744 	}
745 
746 	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
747 	mutex_unlock(&adev->grbm_idx_mutex);
748 
749 	return 0;
750 }
751 
752 static int vce_v3_0_set_powergating_state(void *handle,
753 					  enum amd_powergating_state state)
754 {
755 	/* This doesn't actually powergate the VCE block.
756 	 * That's done in the dpm code via the SMC.  This
757 	 * just re-inits the block as necessary.  The actual
758 	 * gating still happens in the dpm code.  We should
759 	 * revisit this when there is a cleaner line between
760 	 * the smc and the hw blocks
761 	 */
762 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
763 	int ret = 0;
764 
765 	if (state == AMD_PG_STATE_GATE) {
766 		ret = vce_v3_0_stop(adev);
767 		if (ret)
768 			goto out;
769 	} else {
770 		ret = vce_v3_0_start(adev);
771 		if (ret)
772 			goto out;
773 	}
774 
775 out:
776 	return ret;
777 }
778 
779 static void vce_v3_0_get_clockgating_state(void *handle, u32 *flags)
780 {
781 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
782 	int data;
783 
784 	mutex_lock(&adev->pm.mutex);
785 
786 	if (adev->flags & AMD_IS_APU)
787 		data = RREG32_SMC(ixCURRENT_PG_STATUS_APU);
788 	else
789 		data = RREG32_SMC(ixCURRENT_PG_STATUS);
790 
791 	if (data & CURRENT_PG_STATUS__VCE_PG_STATUS_MASK) {
792 		DRM_INFO("Cannot get clockgating state when VCE is powergated.\n");
793 		goto out;
794 	}
795 
796 	WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
797 
798 	/* AMD_CG_SUPPORT_VCE_MGCG */
799 	data = RREG32(mmVCE_CLOCK_GATING_A);
800 	if (data & (0x04 << 4))
801 		*flags |= AMD_CG_SUPPORT_VCE_MGCG;
802 
803 out:
804 	mutex_unlock(&adev->pm.mutex);
805 }
806 
807 static void vce_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
808 		struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch)
809 {
810 	amdgpu_ring_write(ring, VCE_CMD_IB_VM);
811 	amdgpu_ring_write(ring, vm_id);
812 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
813 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
814 	amdgpu_ring_write(ring, ib->length_dw);
815 }
816 
817 static void vce_v3_0_emit_vm_flush(struct amdgpu_ring *ring,
818 			 unsigned int vm_id, uint64_t pd_addr)
819 {
820 	amdgpu_ring_write(ring, VCE_CMD_UPDATE_PTB);
821 	amdgpu_ring_write(ring, vm_id);
822 	amdgpu_ring_write(ring, pd_addr >> 12);
823 
824 	amdgpu_ring_write(ring, VCE_CMD_FLUSH_TLB);
825 	amdgpu_ring_write(ring, vm_id);
826 	amdgpu_ring_write(ring, VCE_CMD_END);
827 }
828 
829 static void vce_v3_0_emit_pipeline_sync(struct amdgpu_ring *ring)
830 {
831 	uint32_t seq = ring->fence_drv.sync_seq;
832 	uint64_t addr = ring->fence_drv.gpu_addr;
833 
834 	amdgpu_ring_write(ring, VCE_CMD_WAIT_GE);
835 	amdgpu_ring_write(ring, lower_32_bits(addr));
836 	amdgpu_ring_write(ring, upper_32_bits(addr));
837 	amdgpu_ring_write(ring, seq);
838 }
839 
840 static const struct amd_ip_funcs vce_v3_0_ip_funcs = {
841 	.name = "vce_v3_0",
842 	.early_init = vce_v3_0_early_init,
843 	.late_init = NULL,
844 	.sw_init = vce_v3_0_sw_init,
845 	.sw_fini = vce_v3_0_sw_fini,
846 	.hw_init = vce_v3_0_hw_init,
847 	.hw_fini = vce_v3_0_hw_fini,
848 	.suspend = vce_v3_0_suspend,
849 	.resume = vce_v3_0_resume,
850 	.is_idle = vce_v3_0_is_idle,
851 	.wait_for_idle = vce_v3_0_wait_for_idle,
852 	.check_soft_reset = vce_v3_0_check_soft_reset,
853 	.pre_soft_reset = vce_v3_0_pre_soft_reset,
854 	.soft_reset = vce_v3_0_soft_reset,
855 	.post_soft_reset = vce_v3_0_post_soft_reset,
856 	.set_clockgating_state = vce_v3_0_set_clockgating_state,
857 	.set_powergating_state = vce_v3_0_set_powergating_state,
858 	.get_clockgating_state = vce_v3_0_get_clockgating_state,
859 };
860 
861 static const struct amdgpu_ring_funcs vce_v3_0_ring_phys_funcs = {
862 	.type = AMDGPU_RING_TYPE_VCE,
863 	.align_mask = 0xf,
864 	.nop = VCE_CMD_NO_OP,
865 	.support_64bit_ptrs = false,
866 	.get_rptr = vce_v3_0_ring_get_rptr,
867 	.get_wptr = vce_v3_0_ring_get_wptr,
868 	.set_wptr = vce_v3_0_ring_set_wptr,
869 	.parse_cs = amdgpu_vce_ring_parse_cs,
870 	.emit_frame_size =
871 		4 + /* vce_v3_0_emit_pipeline_sync */
872 		6, /* amdgpu_vce_ring_emit_fence x1 no user fence */
873 	.emit_ib_size = 5, /* vce_v3_0_ring_emit_ib */
874 	.emit_ib = amdgpu_vce_ring_emit_ib,
875 	.emit_fence = amdgpu_vce_ring_emit_fence,
876 	.test_ring = amdgpu_vce_ring_test_ring,
877 	.test_ib = amdgpu_vce_ring_test_ib,
878 	.insert_nop = amdgpu_ring_insert_nop,
879 	.pad_ib = amdgpu_ring_generic_pad_ib,
880 	.begin_use = amdgpu_vce_ring_begin_use,
881 	.end_use = amdgpu_vce_ring_end_use,
882 };
883 
884 static const struct amdgpu_ring_funcs vce_v3_0_ring_vm_funcs = {
885 	.type = AMDGPU_RING_TYPE_VCE,
886 	.align_mask = 0xf,
887 	.nop = VCE_CMD_NO_OP,
888 	.support_64bit_ptrs = false,
889 	.get_rptr = vce_v3_0_ring_get_rptr,
890 	.get_wptr = vce_v3_0_ring_get_wptr,
891 	.set_wptr = vce_v3_0_ring_set_wptr,
892 	.parse_cs = amdgpu_vce_ring_parse_cs_vm,
893 	.emit_frame_size =
894 		6 + /* vce_v3_0_emit_vm_flush */
895 		4 + /* vce_v3_0_emit_pipeline_sync */
896 		6 + 6, /* amdgpu_vce_ring_emit_fence x2 vm fence */
897 	.emit_ib_size = 4, /* amdgpu_vce_ring_emit_ib */
898 	.emit_ib = vce_v3_0_ring_emit_ib,
899 	.emit_vm_flush = vce_v3_0_emit_vm_flush,
900 	.emit_pipeline_sync = vce_v3_0_emit_pipeline_sync,
901 	.emit_fence = amdgpu_vce_ring_emit_fence,
902 	.test_ring = amdgpu_vce_ring_test_ring,
903 	.test_ib = amdgpu_vce_ring_test_ib,
904 	.insert_nop = amdgpu_ring_insert_nop,
905 	.pad_ib = amdgpu_ring_generic_pad_ib,
906 	.begin_use = amdgpu_vce_ring_begin_use,
907 	.end_use = amdgpu_vce_ring_end_use,
908 };
909 
910 static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev)
911 {
912 	int i;
913 
914 	if (adev->asic_type >= CHIP_STONEY) {
915 		for (i = 0; i < adev->vce.num_rings; i++)
916 			adev->vce.ring[i].funcs = &vce_v3_0_ring_vm_funcs;
917 		DRM_INFO("VCE enabled in VM mode\n");
918 	} else {
919 		for (i = 0; i < adev->vce.num_rings; i++)
920 			adev->vce.ring[i].funcs = &vce_v3_0_ring_phys_funcs;
921 		DRM_INFO("VCE enabled in physical mode\n");
922 	}
923 }
924 
925 static const struct amdgpu_irq_src_funcs vce_v3_0_irq_funcs = {
926 	.set = vce_v3_0_set_interrupt_state,
927 	.process = vce_v3_0_process_interrupt,
928 };
929 
930 static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev)
931 {
932 	adev->vce.irq.num_types = 1;
933 	adev->vce.irq.funcs = &vce_v3_0_irq_funcs;
934 };
935 
936 const struct amdgpu_ip_block_version vce_v3_0_ip_block =
937 {
938 	.type = AMD_IP_BLOCK_TYPE_VCE,
939 	.major = 3,
940 	.minor = 0,
941 	.rev = 0,
942 	.funcs = &vce_v3_0_ip_funcs,
943 };
944 
945 const struct amdgpu_ip_block_version vce_v3_1_ip_block =
946 {
947 	.type = AMD_IP_BLOCK_TYPE_VCE,
948 	.major = 3,
949 	.minor = 1,
950 	.rev = 0,
951 	.funcs = &vce_v3_0_ip_funcs,
952 };
953 
954 const struct amdgpu_ip_block_version vce_v3_4_ip_block =
955 {
956 	.type = AMD_IP_BLOCK_TYPE_VCE,
957 	.major = 3,
958 	.minor = 4,
959 	.rev = 0,
960 	.funcs = &vce_v3_0_ip_funcs,
961 };
962