xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c (revision 4da722ca)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  * Authors: Christian König <christian.koenig@amd.com>
26  */
27 
28 #include <linux/firmware.h>
29 #include <drm/drmP.h>
30 #include "amdgpu.h"
31 #include "amdgpu_vce.h"
32 #include "vid.h"
33 #include "vce/vce_3_0_d.h"
34 #include "vce/vce_3_0_sh_mask.h"
35 #include "oss/oss_3_0_d.h"
36 #include "oss/oss_3_0_sh_mask.h"
37 #include "gca/gfx_8_0_d.h"
38 #include "smu/smu_7_1_2_d.h"
39 #include "smu/smu_7_1_2_sh_mask.h"
40 #include "gca/gfx_8_0_d.h"
41 #include "gca/gfx_8_0_sh_mask.h"
42 
43 
44 #define GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT	0x04
45 #define GRBM_GFX_INDEX__VCE_INSTANCE_MASK	0x10
46 #define GRBM_GFX_INDEX__VCE_ALL_PIPE		0x07
47 
48 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR0	0x8616
49 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR1	0x8617
50 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR2	0x8618
51 #define mmGRBM_GFX_INDEX_DEFAULT 0xE0000000
52 
53 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
54 
55 #define VCE_V3_0_FW_SIZE	(384 * 1024)
56 #define VCE_V3_0_STACK_SIZE	(64 * 1024)
57 #define VCE_V3_0_DATA_SIZE	((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
58 
59 #define FW_52_8_3	((52 << 24) | (8 << 16) | (3 << 8))
60 
61 #define GET_VCE_INSTANCE(i)  ((i) << GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT \
62 					| GRBM_GFX_INDEX__VCE_ALL_PIPE)
63 
64 static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx);
65 static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev);
66 static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev);
67 static int vce_v3_0_wait_for_idle(void *handle);
68 static int vce_v3_0_set_clockgating_state(void *handle,
69 					  enum amd_clockgating_state state);
70 /**
71  * vce_v3_0_ring_get_rptr - get read pointer
72  *
73  * @ring: amdgpu_ring pointer
74  *
75  * Returns the current hardware read pointer
76  */
77 static uint64_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring)
78 {
79 	struct amdgpu_device *adev = ring->adev;
80 	u32 v;
81 
82 	mutex_lock(&adev->grbm_idx_mutex);
83 	if (adev->vce.harvest_config == 0 ||
84 		adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1)
85 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
86 	else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
87 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
88 
89 	if (ring == &adev->vce.ring[0])
90 		v = RREG32(mmVCE_RB_RPTR);
91 	else if (ring == &adev->vce.ring[1])
92 		v = RREG32(mmVCE_RB_RPTR2);
93 	else
94 		v = RREG32(mmVCE_RB_RPTR3);
95 
96 	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
97 	mutex_unlock(&adev->grbm_idx_mutex);
98 
99 	return v;
100 }
101 
102 /**
103  * vce_v3_0_ring_get_wptr - get write pointer
104  *
105  * @ring: amdgpu_ring pointer
106  *
107  * Returns the current hardware write pointer
108  */
109 static uint64_t vce_v3_0_ring_get_wptr(struct amdgpu_ring *ring)
110 {
111 	struct amdgpu_device *adev = ring->adev;
112 	u32 v;
113 
114 	mutex_lock(&adev->grbm_idx_mutex);
115 	if (adev->vce.harvest_config == 0 ||
116 		adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1)
117 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
118 	else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
119 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
120 
121 	if (ring == &adev->vce.ring[0])
122 		v = RREG32(mmVCE_RB_WPTR);
123 	else if (ring == &adev->vce.ring[1])
124 		v = RREG32(mmVCE_RB_WPTR2);
125 	else
126 		v = RREG32(mmVCE_RB_WPTR3);
127 
128 	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
129 	mutex_unlock(&adev->grbm_idx_mutex);
130 
131 	return v;
132 }
133 
134 /**
135  * vce_v3_0_ring_set_wptr - set write pointer
136  *
137  * @ring: amdgpu_ring pointer
138  *
139  * Commits the write pointer to the hardware
140  */
141 static void vce_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
142 {
143 	struct amdgpu_device *adev = ring->adev;
144 
145 	mutex_lock(&adev->grbm_idx_mutex);
146 	if (adev->vce.harvest_config == 0 ||
147 		adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1)
148 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
149 	else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
150 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
151 
152 	if (ring == &adev->vce.ring[0])
153 		WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
154 	else if (ring == &adev->vce.ring[1])
155 		WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
156 	else
157 		WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr));
158 
159 	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
160 	mutex_unlock(&adev->grbm_idx_mutex);
161 }
162 
163 static void vce_v3_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
164 {
165 	WREG32_FIELD(VCE_RB_ARB_CTRL, VCE_CGTT_OVERRIDE, override ? 1 : 0);
166 }
167 
168 static void vce_v3_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
169 					     bool gated)
170 {
171 	u32 data;
172 
173 	/* Set Override to disable Clock Gating */
174 	vce_v3_0_override_vce_clock_gating(adev, true);
175 
176 	/* This function enables MGCG which is controlled by firmware.
177 	   With the clocks in the gated state the core is still
178 	   accessible but the firmware will throttle the clocks on the
179 	   fly as necessary.
180 	*/
181 	if (!gated) {
182 		data = RREG32(mmVCE_CLOCK_GATING_B);
183 		data |= 0x1ff;
184 		data &= ~0xef0000;
185 		WREG32(mmVCE_CLOCK_GATING_B, data);
186 
187 		data = RREG32(mmVCE_UENC_CLOCK_GATING);
188 		data |= 0x3ff000;
189 		data &= ~0xffc00000;
190 		WREG32(mmVCE_UENC_CLOCK_GATING, data);
191 
192 		data = RREG32(mmVCE_UENC_CLOCK_GATING_2);
193 		data |= 0x2;
194 		data &= ~0x00010000;
195 		WREG32(mmVCE_UENC_CLOCK_GATING_2, data);
196 
197 		data = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
198 		data |= 0x37f;
199 		WREG32(mmVCE_UENC_REG_CLOCK_GATING, data);
200 
201 		data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL);
202 		data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
203 			VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
204 			VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
205 			0x8;
206 		WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data);
207 	} else {
208 		data = RREG32(mmVCE_CLOCK_GATING_B);
209 		data &= ~0x80010;
210 		data |= 0xe70008;
211 		WREG32(mmVCE_CLOCK_GATING_B, data);
212 
213 		data = RREG32(mmVCE_UENC_CLOCK_GATING);
214 		data |= 0xffc00000;
215 		WREG32(mmVCE_UENC_CLOCK_GATING, data);
216 
217 		data = RREG32(mmVCE_UENC_CLOCK_GATING_2);
218 		data |= 0x10000;
219 		WREG32(mmVCE_UENC_CLOCK_GATING_2, data);
220 
221 		data = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
222 		data &= ~0x3ff;
223 		WREG32(mmVCE_UENC_REG_CLOCK_GATING, data);
224 
225 		data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL);
226 		data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
227 			  VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
228 			  VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
229 			  0x8);
230 		WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data);
231 	}
232 	vce_v3_0_override_vce_clock_gating(adev, false);
233 }
234 
235 static int vce_v3_0_firmware_loaded(struct amdgpu_device *adev)
236 {
237 	int i, j;
238 
239 	for (i = 0; i < 10; ++i) {
240 		for (j = 0; j < 100; ++j) {
241 			uint32_t status = RREG32(mmVCE_STATUS);
242 
243 			if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
244 				return 0;
245 			mdelay(10);
246 		}
247 
248 		DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
249 		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 1);
250 		mdelay(10);
251 		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 0);
252 		mdelay(10);
253 	}
254 
255 	return -ETIMEDOUT;
256 }
257 
258 /**
259  * vce_v3_0_start - start VCE block
260  *
261  * @adev: amdgpu_device pointer
262  *
263  * Setup and start the VCE block
264  */
265 static int vce_v3_0_start(struct amdgpu_device *adev)
266 {
267 	struct amdgpu_ring *ring;
268 	int idx, r;
269 
270 	mutex_lock(&adev->grbm_idx_mutex);
271 	for (idx = 0; idx < 2; ++idx) {
272 		if (adev->vce.harvest_config & (1 << idx))
273 			continue;
274 
275 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(idx));
276 
277 		/* Program instance 0 reg space for two instances or instance 0 case
278 		program instance 1 reg space for only instance 1 available case */
279 		if (idx != 1 || adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0) {
280 			ring = &adev->vce.ring[0];
281 			WREG32(mmVCE_RB_RPTR, lower_32_bits(ring->wptr));
282 			WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
283 			WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr);
284 			WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
285 			WREG32(mmVCE_RB_SIZE, ring->ring_size / 4);
286 
287 			ring = &adev->vce.ring[1];
288 			WREG32(mmVCE_RB_RPTR2, lower_32_bits(ring->wptr));
289 			WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
290 			WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr);
291 			WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
292 			WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4);
293 
294 			ring = &adev->vce.ring[2];
295 			WREG32(mmVCE_RB_RPTR3, lower_32_bits(ring->wptr));
296 			WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr));
297 			WREG32(mmVCE_RB_BASE_LO3, ring->gpu_addr);
298 			WREG32(mmVCE_RB_BASE_HI3, upper_32_bits(ring->gpu_addr));
299 			WREG32(mmVCE_RB_SIZE3, ring->ring_size / 4);
300 		}
301 
302 		vce_v3_0_mc_resume(adev, idx);
303 		WREG32_FIELD(VCE_STATUS, JOB_BUSY, 1);
304 
305 		if (adev->asic_type >= CHIP_STONEY)
306 			WREG32_P(mmVCE_VCPU_CNTL, 1, ~0x200001);
307 		else
308 			WREG32_FIELD(VCE_VCPU_CNTL, CLK_EN, 1);
309 
310 		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 0);
311 		mdelay(100);
312 
313 		r = vce_v3_0_firmware_loaded(adev);
314 
315 		/* clear BUSY flag */
316 		WREG32_FIELD(VCE_STATUS, JOB_BUSY, 0);
317 
318 		if (r) {
319 			DRM_ERROR("VCE not responding, giving up!!!\n");
320 			mutex_unlock(&adev->grbm_idx_mutex);
321 			return r;
322 		}
323 	}
324 
325 	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
326 	mutex_unlock(&adev->grbm_idx_mutex);
327 
328 	return 0;
329 }
330 
331 static int vce_v3_0_stop(struct amdgpu_device *adev)
332 {
333 	int idx;
334 
335 	mutex_lock(&adev->grbm_idx_mutex);
336 	for (idx = 0; idx < 2; ++idx) {
337 		if (adev->vce.harvest_config & (1 << idx))
338 			continue;
339 
340 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(idx));
341 
342 		if (adev->asic_type >= CHIP_STONEY)
343 			WREG32_P(mmVCE_VCPU_CNTL, 0, ~0x200001);
344 		else
345 			WREG32_FIELD(VCE_VCPU_CNTL, CLK_EN, 0);
346 
347 		/* hold on ECPU */
348 		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 1);
349 
350 		/* clear VCE STATUS */
351 		WREG32(mmVCE_STATUS, 0);
352 	}
353 
354 	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
355 	mutex_unlock(&adev->grbm_idx_mutex);
356 
357 	return 0;
358 }
359 
360 #define ixVCE_HARVEST_FUSE_MACRO__ADDRESS     0xC0014074
361 #define VCE_HARVEST_FUSE_MACRO__SHIFT       27
362 #define VCE_HARVEST_FUSE_MACRO__MASK        0x18000000
363 
364 static unsigned vce_v3_0_get_harvest_config(struct amdgpu_device *adev)
365 {
366 	u32 tmp;
367 
368 	/* Fiji, Stoney, Polaris10, Polaris11, Polaris12 are single pipe */
369 	if ((adev->asic_type == CHIP_FIJI) ||
370 	    (adev->asic_type == CHIP_STONEY) ||
371 	    (adev->asic_type == CHIP_POLARIS10) ||
372 	    (adev->asic_type == CHIP_POLARIS11) ||
373 	    (adev->asic_type == CHIP_POLARIS12))
374 		return AMDGPU_VCE_HARVEST_VCE1;
375 
376 	/* Tonga and CZ are dual or single pipe */
377 	if (adev->flags & AMD_IS_APU)
378 		tmp = (RREG32_SMC(ixVCE_HARVEST_FUSE_MACRO__ADDRESS) &
379 		       VCE_HARVEST_FUSE_MACRO__MASK) >>
380 			VCE_HARVEST_FUSE_MACRO__SHIFT;
381 	else
382 		tmp = (RREG32_SMC(ixCC_HARVEST_FUSES) &
383 		       CC_HARVEST_FUSES__VCE_DISABLE_MASK) >>
384 			CC_HARVEST_FUSES__VCE_DISABLE__SHIFT;
385 
386 	switch (tmp) {
387 	case 1:
388 		return AMDGPU_VCE_HARVEST_VCE0;
389 	case 2:
390 		return AMDGPU_VCE_HARVEST_VCE1;
391 	case 3:
392 		return AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1;
393 	default:
394 		return 0;
395 	}
396 }
397 
398 static int vce_v3_0_early_init(void *handle)
399 {
400 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
401 
402 	adev->vce.harvest_config = vce_v3_0_get_harvest_config(adev);
403 
404 	if ((adev->vce.harvest_config &
405 	     (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1)) ==
406 	    (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1))
407 		return -ENOENT;
408 
409 	adev->vce.num_rings = 3;
410 
411 	vce_v3_0_set_ring_funcs(adev);
412 	vce_v3_0_set_irq_funcs(adev);
413 
414 	return 0;
415 }
416 
417 static int vce_v3_0_sw_init(void *handle)
418 {
419 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
420 	struct amdgpu_ring *ring;
421 	int r, i;
422 
423 	/* VCE */
424 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 167, &adev->vce.irq);
425 	if (r)
426 		return r;
427 
428 	r = amdgpu_vce_sw_init(adev, VCE_V3_0_FW_SIZE +
429 		(VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE) * 2);
430 	if (r)
431 		return r;
432 
433 	/* 52.8.3 required for 3 ring support */
434 	if (adev->vce.fw_version < FW_52_8_3)
435 		adev->vce.num_rings = 2;
436 
437 	r = amdgpu_vce_resume(adev);
438 	if (r)
439 		return r;
440 
441 	for (i = 0; i < adev->vce.num_rings; i++) {
442 		ring = &adev->vce.ring[i];
443 		sprintf(ring->name, "vce%d", i);
444 		r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
445 		if (r)
446 			return r;
447 	}
448 
449 	return r;
450 }
451 
452 static int vce_v3_0_sw_fini(void *handle)
453 {
454 	int r;
455 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
456 
457 	r = amdgpu_vce_suspend(adev);
458 	if (r)
459 		return r;
460 
461 	return amdgpu_vce_sw_fini(adev);
462 }
463 
464 static int vce_v3_0_hw_init(void *handle)
465 {
466 	int r, i;
467 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
468 
469 	vce_v3_0_override_vce_clock_gating(adev, true);
470 	if (!(adev->flags & AMD_IS_APU))
471 		amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
472 
473 	for (i = 0; i < adev->vce.num_rings; i++)
474 		adev->vce.ring[i].ready = false;
475 
476 	for (i = 0; i < adev->vce.num_rings; i++) {
477 		r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
478 		if (r)
479 			return r;
480 		else
481 			adev->vce.ring[i].ready = true;
482 	}
483 
484 	DRM_INFO("VCE initialized successfully.\n");
485 
486 	return 0;
487 }
488 
489 static int vce_v3_0_hw_fini(void *handle)
490 {
491 	int r;
492 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
493 
494 	r = vce_v3_0_wait_for_idle(handle);
495 	if (r)
496 		return r;
497 
498 	vce_v3_0_stop(adev);
499 	return vce_v3_0_set_clockgating_state(adev, AMD_CG_STATE_GATE);
500 }
501 
502 static int vce_v3_0_suspend(void *handle)
503 {
504 	int r;
505 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
506 
507 	r = vce_v3_0_hw_fini(adev);
508 	if (r)
509 		return r;
510 
511 	return amdgpu_vce_suspend(adev);
512 }
513 
514 static int vce_v3_0_resume(void *handle)
515 {
516 	int r;
517 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
518 
519 	r = amdgpu_vce_resume(adev);
520 	if (r)
521 		return r;
522 
523 	return vce_v3_0_hw_init(adev);
524 }
525 
526 static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx)
527 {
528 	uint32_t offset, size;
529 
530 	WREG32_P(mmVCE_CLOCK_GATING_A, 0, ~(1 << 16));
531 	WREG32_P(mmVCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000);
532 	WREG32_P(mmVCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F);
533 	WREG32(mmVCE_CLOCK_GATING_B, 0x1FF);
534 
535 	WREG32(mmVCE_LMI_CTRL, 0x00398000);
536 	WREG32_P(mmVCE_LMI_CACHE_CTRL, 0x0, ~0x1);
537 	WREG32(mmVCE_LMI_SWAP_CNTL, 0);
538 	WREG32(mmVCE_LMI_SWAP_CNTL1, 0);
539 	WREG32(mmVCE_LMI_VM_CTRL, 0);
540 	WREG32_OR(mmVCE_VCPU_CNTL, 0x00100000);
541 
542 	if (adev->asic_type >= CHIP_STONEY) {
543 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR0, (adev->vce.gpu_addr >> 8));
544 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR1, (adev->vce.gpu_addr >> 8));
545 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR2, (adev->vce.gpu_addr >> 8));
546 	} else
547 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8));
548 	offset = AMDGPU_VCE_FIRMWARE_OFFSET;
549 	size = VCE_V3_0_FW_SIZE;
550 	WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff);
551 	WREG32(mmVCE_VCPU_CACHE_SIZE0, size);
552 
553 	if (idx == 0) {
554 		offset += size;
555 		size = VCE_V3_0_STACK_SIZE;
556 		WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0x7fffffff);
557 		WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
558 		offset += size;
559 		size = VCE_V3_0_DATA_SIZE;
560 		WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0x7fffffff);
561 		WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
562 	} else {
563 		offset += size + VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE;
564 		size = VCE_V3_0_STACK_SIZE;
565 		WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0xfffffff);
566 		WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
567 		offset += size;
568 		size = VCE_V3_0_DATA_SIZE;
569 		WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0xfffffff);
570 		WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
571 	}
572 
573 	WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100);
574 	WREG32_FIELD(VCE_SYS_INT_EN, VCE_SYS_INT_TRAP_INTERRUPT_EN, 1);
575 }
576 
577 static bool vce_v3_0_is_idle(void *handle)
578 {
579 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
580 	u32 mask = 0;
581 
582 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
583 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
584 
585 	return !(RREG32(mmSRBM_STATUS2) & mask);
586 }
587 
588 static int vce_v3_0_wait_for_idle(void *handle)
589 {
590 	unsigned i;
591 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
592 
593 	for (i = 0; i < adev->usec_timeout; i++)
594 		if (vce_v3_0_is_idle(handle))
595 			return 0;
596 
597 	return -ETIMEDOUT;
598 }
599 
600 #define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
601 #define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
602 #define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
603 #define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
604 				      VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
605 
606 static bool vce_v3_0_check_soft_reset(void *handle)
607 {
608 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
609 	u32 srbm_soft_reset = 0;
610 
611 	/* According to VCE team , we should use VCE_STATUS instead
612 	 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
613 	 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
614 	 * instance's registers are accessed
615 	 * (0 for 1st instance, 10 for 2nd instance).
616 	 *
617 	 *VCE_STATUS
618 	 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
619 	 *|----+----+-----------+----+----+----+----------+---------+----|
620 	 *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
621 	 *
622 	 * VCE team suggest use bit 3--bit 6 for busy status check
623 	 */
624 	mutex_lock(&adev->grbm_idx_mutex);
625 	WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
626 	if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
627 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
628 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
629 	}
630 	WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
631 	if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
632 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
633 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
634 	}
635 	WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
636 	mutex_unlock(&adev->grbm_idx_mutex);
637 
638 	if (srbm_soft_reset) {
639 		adev->vce.srbm_soft_reset = srbm_soft_reset;
640 		return true;
641 	} else {
642 		adev->vce.srbm_soft_reset = 0;
643 		return false;
644 	}
645 }
646 
647 static int vce_v3_0_soft_reset(void *handle)
648 {
649 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
650 	u32 srbm_soft_reset;
651 
652 	if (!adev->vce.srbm_soft_reset)
653 		return 0;
654 	srbm_soft_reset = adev->vce.srbm_soft_reset;
655 
656 	if (srbm_soft_reset) {
657 		u32 tmp;
658 
659 		tmp = RREG32(mmSRBM_SOFT_RESET);
660 		tmp |= srbm_soft_reset;
661 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
662 		WREG32(mmSRBM_SOFT_RESET, tmp);
663 		tmp = RREG32(mmSRBM_SOFT_RESET);
664 
665 		udelay(50);
666 
667 		tmp &= ~srbm_soft_reset;
668 		WREG32(mmSRBM_SOFT_RESET, tmp);
669 		tmp = RREG32(mmSRBM_SOFT_RESET);
670 
671 		/* Wait a little for things to settle down */
672 		udelay(50);
673 	}
674 
675 	return 0;
676 }
677 
678 static int vce_v3_0_pre_soft_reset(void *handle)
679 {
680 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
681 
682 	if (!adev->vce.srbm_soft_reset)
683 		return 0;
684 
685 	mdelay(5);
686 
687 	return vce_v3_0_suspend(adev);
688 }
689 
690 
691 static int vce_v3_0_post_soft_reset(void *handle)
692 {
693 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
694 
695 	if (!adev->vce.srbm_soft_reset)
696 		return 0;
697 
698 	mdelay(5);
699 
700 	return vce_v3_0_resume(adev);
701 }
702 
703 static int vce_v3_0_set_interrupt_state(struct amdgpu_device *adev,
704 					struct amdgpu_irq_src *source,
705 					unsigned type,
706 					enum amdgpu_interrupt_state state)
707 {
708 	uint32_t val = 0;
709 
710 	if (state == AMDGPU_IRQ_STATE_ENABLE)
711 		val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
712 
713 	WREG32_P(mmVCE_SYS_INT_EN, val, ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
714 	return 0;
715 }
716 
717 static int vce_v3_0_process_interrupt(struct amdgpu_device *adev,
718 				      struct amdgpu_irq_src *source,
719 				      struct amdgpu_iv_entry *entry)
720 {
721 	DRM_DEBUG("IH: VCE\n");
722 
723 	WREG32_FIELD(VCE_SYS_INT_STATUS, VCE_SYS_INT_TRAP_INTERRUPT_INT, 1);
724 
725 	switch (entry->src_data[0]) {
726 	case 0:
727 	case 1:
728 	case 2:
729 		amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
730 		break;
731 	default:
732 		DRM_ERROR("Unhandled interrupt: %d %d\n",
733 			  entry->src_id, entry->src_data[0]);
734 		break;
735 	}
736 
737 	return 0;
738 }
739 
740 static int vce_v3_0_set_clockgating_state(void *handle,
741 					  enum amd_clockgating_state state)
742 {
743 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
744 	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
745 	int i;
746 
747 	if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
748 		return 0;
749 
750 	mutex_lock(&adev->grbm_idx_mutex);
751 	for (i = 0; i < 2; i++) {
752 		/* Program VCE Instance 0 or 1 if not harvested */
753 		if (adev->vce.harvest_config & (1 << i))
754 			continue;
755 
756 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(i));
757 
758 		if (!enable) {
759 			/* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
760 			uint32_t data = RREG32(mmVCE_CLOCK_GATING_A);
761 			data &= ~(0xf | 0xff0);
762 			data |= ((0x0 << 0) | (0x04 << 4));
763 			WREG32(mmVCE_CLOCK_GATING_A, data);
764 
765 			/* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
766 			data = RREG32(mmVCE_UENC_CLOCK_GATING);
767 			data &= ~(0xf | 0xff0);
768 			data |= ((0x0 << 0) | (0x04 << 4));
769 			WREG32(mmVCE_UENC_CLOCK_GATING, data);
770 		}
771 
772 		vce_v3_0_set_vce_sw_clock_gating(adev, enable);
773 	}
774 
775 	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
776 	mutex_unlock(&adev->grbm_idx_mutex);
777 
778 	return 0;
779 }
780 
781 static int vce_v3_0_set_powergating_state(void *handle,
782 					  enum amd_powergating_state state)
783 {
784 	/* This doesn't actually powergate the VCE block.
785 	 * That's done in the dpm code via the SMC.  This
786 	 * just re-inits the block as necessary.  The actual
787 	 * gating still happens in the dpm code.  We should
788 	 * revisit this when there is a cleaner line between
789 	 * the smc and the hw blocks
790 	 */
791 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
792 	int ret = 0;
793 
794 	if (state == AMD_PG_STATE_GATE) {
795 		ret = vce_v3_0_stop(adev);
796 		if (ret)
797 			goto out;
798 	} else {
799 		ret = vce_v3_0_start(adev);
800 		if (ret)
801 			goto out;
802 	}
803 
804 out:
805 	return ret;
806 }
807 
808 static void vce_v3_0_get_clockgating_state(void *handle, u32 *flags)
809 {
810 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
811 	int data;
812 
813 	mutex_lock(&adev->pm.mutex);
814 
815 	if (adev->flags & AMD_IS_APU)
816 		data = RREG32_SMC(ixCURRENT_PG_STATUS_APU);
817 	else
818 		data = RREG32_SMC(ixCURRENT_PG_STATUS);
819 
820 	if (data & CURRENT_PG_STATUS__VCE_PG_STATUS_MASK) {
821 		DRM_INFO("Cannot get clockgating state when VCE is powergated.\n");
822 		goto out;
823 	}
824 
825 	WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
826 
827 	/* AMD_CG_SUPPORT_VCE_MGCG */
828 	data = RREG32(mmVCE_CLOCK_GATING_A);
829 	if (data & (0x04 << 4))
830 		*flags |= AMD_CG_SUPPORT_VCE_MGCG;
831 
832 out:
833 	mutex_unlock(&adev->pm.mutex);
834 }
835 
836 static void vce_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
837 		struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch)
838 {
839 	amdgpu_ring_write(ring, VCE_CMD_IB_VM);
840 	amdgpu_ring_write(ring, vm_id);
841 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
842 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
843 	amdgpu_ring_write(ring, ib->length_dw);
844 }
845 
846 static void vce_v3_0_emit_vm_flush(struct amdgpu_ring *ring,
847 			 unsigned int vm_id, uint64_t pd_addr)
848 {
849 	amdgpu_ring_write(ring, VCE_CMD_UPDATE_PTB);
850 	amdgpu_ring_write(ring, vm_id);
851 	amdgpu_ring_write(ring, pd_addr >> 12);
852 
853 	amdgpu_ring_write(ring, VCE_CMD_FLUSH_TLB);
854 	amdgpu_ring_write(ring, vm_id);
855 	amdgpu_ring_write(ring, VCE_CMD_END);
856 }
857 
858 static void vce_v3_0_emit_pipeline_sync(struct amdgpu_ring *ring)
859 {
860 	uint32_t seq = ring->fence_drv.sync_seq;
861 	uint64_t addr = ring->fence_drv.gpu_addr;
862 
863 	amdgpu_ring_write(ring, VCE_CMD_WAIT_GE);
864 	amdgpu_ring_write(ring, lower_32_bits(addr));
865 	amdgpu_ring_write(ring, upper_32_bits(addr));
866 	amdgpu_ring_write(ring, seq);
867 }
868 
869 static const struct amd_ip_funcs vce_v3_0_ip_funcs = {
870 	.name = "vce_v3_0",
871 	.early_init = vce_v3_0_early_init,
872 	.late_init = NULL,
873 	.sw_init = vce_v3_0_sw_init,
874 	.sw_fini = vce_v3_0_sw_fini,
875 	.hw_init = vce_v3_0_hw_init,
876 	.hw_fini = vce_v3_0_hw_fini,
877 	.suspend = vce_v3_0_suspend,
878 	.resume = vce_v3_0_resume,
879 	.is_idle = vce_v3_0_is_idle,
880 	.wait_for_idle = vce_v3_0_wait_for_idle,
881 	.check_soft_reset = vce_v3_0_check_soft_reset,
882 	.pre_soft_reset = vce_v3_0_pre_soft_reset,
883 	.soft_reset = vce_v3_0_soft_reset,
884 	.post_soft_reset = vce_v3_0_post_soft_reset,
885 	.set_clockgating_state = vce_v3_0_set_clockgating_state,
886 	.set_powergating_state = vce_v3_0_set_powergating_state,
887 	.get_clockgating_state = vce_v3_0_get_clockgating_state,
888 };
889 
890 static const struct amdgpu_ring_funcs vce_v3_0_ring_phys_funcs = {
891 	.type = AMDGPU_RING_TYPE_VCE,
892 	.align_mask = 0xf,
893 	.nop = VCE_CMD_NO_OP,
894 	.support_64bit_ptrs = false,
895 	.get_rptr = vce_v3_0_ring_get_rptr,
896 	.get_wptr = vce_v3_0_ring_get_wptr,
897 	.set_wptr = vce_v3_0_ring_set_wptr,
898 	.parse_cs = amdgpu_vce_ring_parse_cs,
899 	.emit_frame_size =
900 		4 + /* vce_v3_0_emit_pipeline_sync */
901 		6, /* amdgpu_vce_ring_emit_fence x1 no user fence */
902 	.emit_ib_size = 5, /* vce_v3_0_ring_emit_ib */
903 	.emit_ib = amdgpu_vce_ring_emit_ib,
904 	.emit_fence = amdgpu_vce_ring_emit_fence,
905 	.test_ring = amdgpu_vce_ring_test_ring,
906 	.test_ib = amdgpu_vce_ring_test_ib,
907 	.insert_nop = amdgpu_ring_insert_nop,
908 	.pad_ib = amdgpu_ring_generic_pad_ib,
909 	.begin_use = amdgpu_vce_ring_begin_use,
910 	.end_use = amdgpu_vce_ring_end_use,
911 };
912 
913 static const struct amdgpu_ring_funcs vce_v3_0_ring_vm_funcs = {
914 	.type = AMDGPU_RING_TYPE_VCE,
915 	.align_mask = 0xf,
916 	.nop = VCE_CMD_NO_OP,
917 	.support_64bit_ptrs = false,
918 	.get_rptr = vce_v3_0_ring_get_rptr,
919 	.get_wptr = vce_v3_0_ring_get_wptr,
920 	.set_wptr = vce_v3_0_ring_set_wptr,
921 	.parse_cs = amdgpu_vce_ring_parse_cs_vm,
922 	.emit_frame_size =
923 		6 + /* vce_v3_0_emit_vm_flush */
924 		4 + /* vce_v3_0_emit_pipeline_sync */
925 		6 + 6, /* amdgpu_vce_ring_emit_fence x2 vm fence */
926 	.emit_ib_size = 4, /* amdgpu_vce_ring_emit_ib */
927 	.emit_ib = vce_v3_0_ring_emit_ib,
928 	.emit_vm_flush = vce_v3_0_emit_vm_flush,
929 	.emit_pipeline_sync = vce_v3_0_emit_pipeline_sync,
930 	.emit_fence = amdgpu_vce_ring_emit_fence,
931 	.test_ring = amdgpu_vce_ring_test_ring,
932 	.test_ib = amdgpu_vce_ring_test_ib,
933 	.insert_nop = amdgpu_ring_insert_nop,
934 	.pad_ib = amdgpu_ring_generic_pad_ib,
935 	.begin_use = amdgpu_vce_ring_begin_use,
936 	.end_use = amdgpu_vce_ring_end_use,
937 };
938 
939 static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev)
940 {
941 	int i;
942 
943 	if (adev->asic_type >= CHIP_STONEY) {
944 		for (i = 0; i < adev->vce.num_rings; i++)
945 			adev->vce.ring[i].funcs = &vce_v3_0_ring_vm_funcs;
946 		DRM_INFO("VCE enabled in VM mode\n");
947 	} else {
948 		for (i = 0; i < adev->vce.num_rings; i++)
949 			adev->vce.ring[i].funcs = &vce_v3_0_ring_phys_funcs;
950 		DRM_INFO("VCE enabled in physical mode\n");
951 	}
952 }
953 
954 static const struct amdgpu_irq_src_funcs vce_v3_0_irq_funcs = {
955 	.set = vce_v3_0_set_interrupt_state,
956 	.process = vce_v3_0_process_interrupt,
957 };
958 
959 static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev)
960 {
961 	adev->vce.irq.num_types = 1;
962 	adev->vce.irq.funcs = &vce_v3_0_irq_funcs;
963 };
964 
965 const struct amdgpu_ip_block_version vce_v3_0_ip_block =
966 {
967 	.type = AMD_IP_BLOCK_TYPE_VCE,
968 	.major = 3,
969 	.minor = 0,
970 	.rev = 0,
971 	.funcs = &vce_v3_0_ip_funcs,
972 };
973 
974 const struct amdgpu_ip_block_version vce_v3_1_ip_block =
975 {
976 	.type = AMD_IP_BLOCK_TYPE_VCE,
977 	.major = 3,
978 	.minor = 1,
979 	.rev = 0,
980 	.funcs = &vce_v3_0_ip_funcs,
981 };
982 
983 const struct amdgpu_ip_block_version vce_v3_4_ip_block =
984 {
985 	.type = AMD_IP_BLOCK_TYPE_VCE,
986 	.major = 3,
987 	.minor = 4,
988 	.rev = 0,
989 	.funcs = &vce_v3_0_ip_funcs,
990 };
991