xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c (revision 5fa1f7680f2728d62561db6d4a9282c4d21f2324)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  */
26 
27 #include <linux/firmware.h>
28 
29 #include "amdgpu.h"
30 #include "amdgpu_vce.h"
31 #include "soc15.h"
32 #include "soc15d.h"
33 #include "soc15_common.h"
34 #include "mmsch_v1_0.h"
35 
36 #include "vce/vce_4_0_offset.h"
37 #include "vce/vce_4_0_default.h"
38 #include "vce/vce_4_0_sh_mask.h"
39 #include "mmhub/mmhub_1_0_offset.h"
40 #include "mmhub/mmhub_1_0_sh_mask.h"
41 
42 #include "ivsrcid/vce/irqsrcs_vce_4_0.h"
43 
44 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
45 
46 #define VCE_V4_0_FW_SIZE	(384 * 1024)
47 #define VCE_V4_0_STACK_SIZE	(64 * 1024)
48 #define VCE_V4_0_DATA_SIZE	((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
49 
50 static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
51 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
52 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
53 
54 /**
55  * vce_v4_0_ring_get_rptr - get read pointer
56  *
57  * @ring: amdgpu_ring pointer
58  *
59  * Returns the current hardware read pointer
60  */
61 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
62 {
63 	struct amdgpu_device *adev = ring->adev;
64 
65 	if (ring->me == 0)
66 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
67 	else if (ring->me == 1)
68 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
69 	else
70 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
71 }
72 
73 /**
74  * vce_v4_0_ring_get_wptr - get write pointer
75  *
76  * @ring: amdgpu_ring pointer
77  *
78  * Returns the current hardware write pointer
79  */
80 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
81 {
82 	struct amdgpu_device *adev = ring->adev;
83 
84 	if (ring->use_doorbell)
85 		return adev->wb.wb[ring->wptr_offs];
86 
87 	if (ring->me == 0)
88 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
89 	else if (ring->me == 1)
90 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
91 	else
92 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
93 }
94 
95 /**
96  * vce_v4_0_ring_set_wptr - set write pointer
97  *
98  * @ring: amdgpu_ring pointer
99  *
100  * Commits the write pointer to the hardware
101  */
102 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
103 {
104 	struct amdgpu_device *adev = ring->adev;
105 
106 	if (ring->use_doorbell) {
107 		/* XXX check if swapping is necessary on BE */
108 		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
109 		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
110 		return;
111 	}
112 
113 	if (ring->me == 0)
114 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
115 			lower_32_bits(ring->wptr));
116 	else if (ring->me == 1)
117 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
118 			lower_32_bits(ring->wptr));
119 	else
120 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
121 			lower_32_bits(ring->wptr));
122 }
123 
124 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
125 {
126 	int i, j;
127 
128 	for (i = 0; i < 10; ++i) {
129 		for (j = 0; j < 100; ++j) {
130 			uint32_t status =
131 				RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
132 
133 			if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
134 				return 0;
135 			mdelay(10);
136 		}
137 
138 		DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
139 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
140 				VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
141 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
142 		mdelay(10);
143 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
144 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
145 		mdelay(10);
146 
147 	}
148 
149 	return -ETIMEDOUT;
150 }
151 
152 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
153 				struct amdgpu_mm_table *table)
154 {
155 	uint32_t data = 0, loop;
156 	uint64_t addr = table->gpu_addr;
157 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
158 	uint32_t size;
159 
160 	size = header->header_size + header->vce_table_size + header->uvd_table_size;
161 
162 	/* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
163 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
164 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
165 
166 	/* 2, update vmid of descriptor */
167 	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
168 	data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
169 	data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
170 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
171 
172 	/* 3, notify mmsch about the size of this descriptor */
173 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
174 
175 	/* 4, set resp to zero */
176 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
177 
178 	WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
179 	adev->wb.wb[adev->vce.ring[0].wptr_offs] = 0;
180 	adev->vce.ring[0].wptr = 0;
181 	adev->vce.ring[0].wptr_old = 0;
182 
183 	/* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
184 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
185 
186 	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
187 	loop = 1000;
188 	while ((data & 0x10000002) != 0x10000002) {
189 		udelay(10);
190 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
191 		loop--;
192 		if (!loop)
193 			break;
194 	}
195 
196 	if (!loop) {
197 		dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
198 		return -EBUSY;
199 	}
200 
201 	return 0;
202 }
203 
204 static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
205 {
206 	struct amdgpu_ring *ring;
207 	uint32_t offset, size;
208 	uint32_t table_size = 0;
209 	struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
210 	struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
211 	struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
212 	struct mmsch_v1_0_cmd_end end = { { 0 } };
213 	uint32_t *init_table = adev->virt.mm_table.cpu_addr;
214 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
215 
216 	direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
217 	direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
218 	direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
219 	end.cmd_header.command_type = MMSCH_COMMAND__END;
220 
221 	if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
222 		header->version = MMSCH_VERSION;
223 		header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
224 
225 		if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
226 			header->vce_table_offset = header->header_size;
227 		else
228 			header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
229 
230 		init_table += header->vce_table_offset;
231 
232 		ring = &adev->vce.ring[0];
233 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
234 					    lower_32_bits(ring->gpu_addr));
235 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
236 					    upper_32_bits(ring->gpu_addr));
237 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
238 					    ring->ring_size / 4);
239 
240 		/* BEGING OF MC_RESUME */
241 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
242 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
243 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
244 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
245 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
246 
247 		offset = AMDGPU_VCE_FIRMWARE_OFFSET;
248 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
249 			uint32_t low = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
250 			uint32_t hi = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi;
251 			uint64_t tmr_mc_addr = (uint64_t)(hi) << 32 | low;
252 
253 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
254 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), tmr_mc_addr >> 8);
255 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
256 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
257 						(tmr_mc_addr >> 40) & 0xff);
258 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
259 		} else {
260 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
261 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
262 						adev->vce.gpu_addr >> 8);
263 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
264 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
265 						(adev->vce.gpu_addr >> 40) & 0xff);
266 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
267 						offset & ~0x0f000000);
268 
269 		}
270 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
271 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
272 						adev->vce.gpu_addr >> 8);
273 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
274 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR1),
275 						(adev->vce.gpu_addr >> 40) & 0xff);
276 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
277 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
278 						adev->vce.gpu_addr >> 8);
279 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
280 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR2),
281 						(adev->vce.gpu_addr >> 40) & 0xff);
282 
283 		size = VCE_V4_0_FW_SIZE;
284 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
285 
286 		offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
287 		size = VCE_V4_0_STACK_SIZE;
288 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
289 					(offset & ~0x0f000000) | (1 << 24));
290 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
291 
292 		offset += size;
293 		size = VCE_V4_0_DATA_SIZE;
294 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
295 					(offset & ~0x0f000000) | (2 << 24));
296 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
297 
298 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
299 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
300 						   VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
301 						   VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
302 
303 		/* end of MC_RESUME */
304 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
305 						   VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
306 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
307 						   ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
308 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
309 						   ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
310 
311 		MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
312 					      VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
313 					      VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
314 
315 		/* clear BUSY flag */
316 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
317 						   ~VCE_STATUS__JOB_BUSY_MASK, 0);
318 
319 		/* add end packet */
320 		memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
321 		table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
322 		header->vce_table_size = table_size;
323 	}
324 
325 	return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
326 }
327 
328 /**
329  * vce_v4_0_start - start VCE block
330  *
331  * @adev: amdgpu_device pointer
332  *
333  * Setup and start the VCE block
334  */
335 static int vce_v4_0_start(struct amdgpu_device *adev)
336 {
337 	struct amdgpu_ring *ring;
338 	int r;
339 
340 	ring = &adev->vce.ring[0];
341 
342 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
343 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
344 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
345 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
346 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
347 
348 	ring = &adev->vce.ring[1];
349 
350 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
351 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
352 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
353 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
354 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
355 
356 	ring = &adev->vce.ring[2];
357 
358 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
359 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
360 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
361 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
362 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
363 
364 	vce_v4_0_mc_resume(adev);
365 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
366 			~VCE_STATUS__JOB_BUSY_MASK);
367 
368 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
369 
370 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
371 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
372 	mdelay(100);
373 
374 	r = vce_v4_0_firmware_loaded(adev);
375 
376 	/* clear BUSY flag */
377 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
378 
379 	if (r) {
380 		DRM_ERROR("VCE not responding, giving up!!!\n");
381 		return r;
382 	}
383 
384 	return 0;
385 }
386 
387 static int vce_v4_0_stop(struct amdgpu_device *adev)
388 {
389 
390 	/* Disable VCPU */
391 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
392 
393 	/* hold on ECPU */
394 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
395 			VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
396 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
397 
398 	/* clear VCE_STATUS */
399 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0);
400 
401 	/* Set Clock-Gating off */
402 	/* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
403 		vce_v4_0_set_vce_sw_clock_gating(adev, false);
404 	*/
405 
406 	return 0;
407 }
408 
409 static int vce_v4_0_early_init(void *handle)
410 {
411 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
412 
413 	if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
414 		adev->vce.num_rings = 1;
415 	else
416 		adev->vce.num_rings = 3;
417 
418 	vce_v4_0_set_ring_funcs(adev);
419 	vce_v4_0_set_irq_funcs(adev);
420 
421 	return 0;
422 }
423 
424 static int vce_v4_0_sw_init(void *handle)
425 {
426 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
427 	struct amdgpu_ring *ring;
428 
429 	unsigned size;
430 	int r, i;
431 
432 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
433 	if (r)
434 		return r;
435 
436 	size  = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE;
437 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
438 		size += VCE_V4_0_FW_SIZE;
439 
440 	r = amdgpu_vce_sw_init(adev, size);
441 	if (r)
442 		return r;
443 
444 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
445 		const struct common_firmware_header *hdr;
446 		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
447 
448 		adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL);
449 		if (!adev->vce.saved_bo)
450 			return -ENOMEM;
451 
452 		hdr = (const struct common_firmware_header *)adev->vce.fw->data;
453 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
454 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
455 		adev->firmware.fw_size +=
456 			ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
457 		DRM_INFO("PSP loading VCE firmware\n");
458 	} else {
459 		r = amdgpu_vce_resume(adev);
460 		if (r)
461 			return r;
462 	}
463 
464 	for (i = 0; i < adev->vce.num_rings; i++) {
465 		ring = &adev->vce.ring[i];
466 		sprintf(ring->name, "vce%d", i);
467 		if (amdgpu_sriov_vf(adev)) {
468 			/* DOORBELL only works under SRIOV */
469 			ring->use_doorbell = true;
470 
471 			/* currently only use the first encoding ring for sriov,
472 			 * so set unused location for other unused rings.
473 			 */
474 			if (i == 0)
475 				ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring0_1 * 2;
476 			else
477 				ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring2_3 * 2 + 1;
478 		}
479 		r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0,
480 				     AMDGPU_RING_PRIO_DEFAULT);
481 		if (r)
482 			return r;
483 	}
484 
485 
486 	r = amdgpu_vce_entity_init(adev);
487 	if (r)
488 		return r;
489 
490 	r = amdgpu_virt_alloc_mm_table(adev);
491 	if (r)
492 		return r;
493 
494 	return r;
495 }
496 
497 static int vce_v4_0_sw_fini(void *handle)
498 {
499 	int r;
500 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
501 
502 	/* free MM table */
503 	amdgpu_virt_free_mm_table(adev);
504 
505 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
506 		kvfree(adev->vce.saved_bo);
507 		adev->vce.saved_bo = NULL;
508 	}
509 
510 	r = amdgpu_vce_suspend(adev);
511 	if (r)
512 		return r;
513 
514 	return amdgpu_vce_sw_fini(adev);
515 }
516 
517 static int vce_v4_0_hw_init(void *handle)
518 {
519 	int r, i;
520 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
521 
522 	if (amdgpu_sriov_vf(adev))
523 		r = vce_v4_0_sriov_start(adev);
524 	else
525 		r = vce_v4_0_start(adev);
526 	if (r)
527 		return r;
528 
529 	for (i = 0; i < adev->vce.num_rings; i++) {
530 		r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
531 		if (r)
532 			return r;
533 	}
534 
535 	DRM_INFO("VCE initialized successfully.\n");
536 
537 	return 0;
538 }
539 
540 static int vce_v4_0_hw_fini(void *handle)
541 {
542 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
543 
544 	if (!amdgpu_sriov_vf(adev)) {
545 		/* vce_v4_0_wait_for_idle(handle); */
546 		vce_v4_0_stop(adev);
547 	} else {
548 		/* full access mode, so don't touch any VCE register */
549 		DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
550 	}
551 
552 	return 0;
553 }
554 
555 static int vce_v4_0_suspend(void *handle)
556 {
557 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
558 	int r;
559 
560 	if (adev->vce.vcpu_bo == NULL)
561 		return 0;
562 
563 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
564 		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
565 		void *ptr = adev->vce.cpu_addr;
566 
567 		memcpy_fromio(adev->vce.saved_bo, ptr, size);
568 	}
569 
570 	r = vce_v4_0_hw_fini(adev);
571 	if (r)
572 		return r;
573 
574 	return amdgpu_vce_suspend(adev);
575 }
576 
577 static int vce_v4_0_resume(void *handle)
578 {
579 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
580 	int r;
581 
582 	if (adev->vce.vcpu_bo == NULL)
583 		return -EINVAL;
584 
585 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
586 		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
587 		void *ptr = adev->vce.cpu_addr;
588 
589 		memcpy_toio(ptr, adev->vce.saved_bo, size);
590 	} else {
591 		r = amdgpu_vce_resume(adev);
592 		if (r)
593 			return r;
594 	}
595 
596 	return vce_v4_0_hw_init(adev);
597 }
598 
599 static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
600 {
601 	uint32_t offset, size;
602 	uint64_t tmr_mc_addr;
603 
604 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
605 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
606 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
607 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
608 
609 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
610 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
611 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
612 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
613 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
614 
615 	offset = AMDGPU_VCE_FIRMWARE_OFFSET;
616 
617 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
618 		tmr_mc_addr = (uint64_t)(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi) << 32 |
619 										adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
620 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
621 			(tmr_mc_addr >> 8));
622 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
623 			(tmr_mc_addr >> 40) & 0xff);
624 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
625 	} else {
626 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
627 			(adev->vce.gpu_addr >> 8));
628 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
629 			(adev->vce.gpu_addr >> 40) & 0xff);
630 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
631 	}
632 
633 	size = VCE_V4_0_FW_SIZE;
634 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
635 
636 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
637 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
638 	offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
639 	size = VCE_V4_0_STACK_SIZE;
640 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
641 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
642 
643 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
644 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
645 	offset += size;
646 	size = VCE_V4_0_DATA_SIZE;
647 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
648 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
649 
650 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
651 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
652 			VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
653 			~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
654 }
655 
656 static int vce_v4_0_set_clockgating_state(void *handle,
657 					  enum amd_clockgating_state state)
658 {
659 	/* needed for driver unload*/
660 	return 0;
661 }
662 
663 #if 0
664 static bool vce_v4_0_is_idle(void *handle)
665 {
666 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
667 	u32 mask = 0;
668 
669 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
670 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
671 
672 	return !(RREG32(mmSRBM_STATUS2) & mask);
673 }
674 
675 static int vce_v4_0_wait_for_idle(void *handle)
676 {
677 	unsigned i;
678 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
679 
680 	for (i = 0; i < adev->usec_timeout; i++)
681 		if (vce_v4_0_is_idle(handle))
682 			return 0;
683 
684 	return -ETIMEDOUT;
685 }
686 
687 #define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
688 #define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
689 #define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
690 #define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
691 				      VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
692 
693 static bool vce_v4_0_check_soft_reset(void *handle)
694 {
695 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
696 	u32 srbm_soft_reset = 0;
697 
698 	/* According to VCE team , we should use VCE_STATUS instead
699 	 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
700 	 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
701 	 * instance's registers are accessed
702 	 * (0 for 1st instance, 10 for 2nd instance).
703 	 *
704 	 *VCE_STATUS
705 	 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
706 	 *|----+----+-----------+----+----+----+----------+---------+----|
707 	 *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
708 	 *
709 	 * VCE team suggest use bit 3--bit 6 for busy status check
710 	 */
711 	mutex_lock(&adev->grbm_idx_mutex);
712 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
713 	if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
714 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
715 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
716 	}
717 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
718 	if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
719 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
720 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
721 	}
722 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
723 	mutex_unlock(&adev->grbm_idx_mutex);
724 
725 	if (srbm_soft_reset) {
726 		adev->vce.srbm_soft_reset = srbm_soft_reset;
727 		return true;
728 	} else {
729 		adev->vce.srbm_soft_reset = 0;
730 		return false;
731 	}
732 }
733 
734 static int vce_v4_0_soft_reset(void *handle)
735 {
736 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
737 	u32 srbm_soft_reset;
738 
739 	if (!adev->vce.srbm_soft_reset)
740 		return 0;
741 	srbm_soft_reset = adev->vce.srbm_soft_reset;
742 
743 	if (srbm_soft_reset) {
744 		u32 tmp;
745 
746 		tmp = RREG32(mmSRBM_SOFT_RESET);
747 		tmp |= srbm_soft_reset;
748 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
749 		WREG32(mmSRBM_SOFT_RESET, tmp);
750 		tmp = RREG32(mmSRBM_SOFT_RESET);
751 
752 		udelay(50);
753 
754 		tmp &= ~srbm_soft_reset;
755 		WREG32(mmSRBM_SOFT_RESET, tmp);
756 		tmp = RREG32(mmSRBM_SOFT_RESET);
757 
758 		/* Wait a little for things to settle down */
759 		udelay(50);
760 	}
761 
762 	return 0;
763 }
764 
765 static int vce_v4_0_pre_soft_reset(void *handle)
766 {
767 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
768 
769 	if (!adev->vce.srbm_soft_reset)
770 		return 0;
771 
772 	mdelay(5);
773 
774 	return vce_v4_0_suspend(adev);
775 }
776 
777 
778 static int vce_v4_0_post_soft_reset(void *handle)
779 {
780 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
781 
782 	if (!adev->vce.srbm_soft_reset)
783 		return 0;
784 
785 	mdelay(5);
786 
787 	return vce_v4_0_resume(adev);
788 }
789 
790 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
791 {
792 	u32 tmp, data;
793 
794 	tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
795 	if (override)
796 		data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
797 	else
798 		data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
799 
800 	if (tmp != data)
801 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
802 }
803 
804 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
805 					     bool gated)
806 {
807 	u32 data;
808 
809 	/* Set Override to disable Clock Gating */
810 	vce_v4_0_override_vce_clock_gating(adev, true);
811 
812 	/* This function enables MGCG which is controlled by firmware.
813 	   With the clocks in the gated state the core is still
814 	   accessible but the firmware will throttle the clocks on the
815 	   fly as necessary.
816 	*/
817 	if (gated) {
818 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
819 		data |= 0x1ff;
820 		data &= ~0xef0000;
821 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
822 
823 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
824 		data |= 0x3ff000;
825 		data &= ~0xffc00000;
826 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
827 
828 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
829 		data |= 0x2;
830 		data &= ~0x00010000;
831 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
832 
833 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
834 		data |= 0x37f;
835 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
836 
837 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
838 		data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
839 			VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
840 			VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
841 			0x8;
842 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
843 	} else {
844 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
845 		data &= ~0x80010;
846 		data |= 0xe70008;
847 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
848 
849 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
850 		data |= 0xffc00000;
851 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
852 
853 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
854 		data |= 0x10000;
855 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
856 
857 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
858 		data &= ~0xffc00000;
859 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
860 
861 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
862 		data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
863 			  VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
864 			  VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
865 			  0x8);
866 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
867 	}
868 	vce_v4_0_override_vce_clock_gating(adev, false);
869 }
870 
871 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
872 {
873 	u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
874 
875 	if (enable)
876 		tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
877 	else
878 		tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
879 
880 	WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
881 }
882 
883 static int vce_v4_0_set_clockgating_state(void *handle,
884 					  enum amd_clockgating_state state)
885 {
886 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
887 	bool enable = (state == AMD_CG_STATE_GATE);
888 	int i;
889 
890 	if ((adev->asic_type == CHIP_POLARIS10) ||
891 		(adev->asic_type == CHIP_TONGA) ||
892 		(adev->asic_type == CHIP_FIJI))
893 		vce_v4_0_set_bypass_mode(adev, enable);
894 
895 	if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
896 		return 0;
897 
898 	mutex_lock(&adev->grbm_idx_mutex);
899 	for (i = 0; i < 2; i++) {
900 		/* Program VCE Instance 0 or 1 if not harvested */
901 		if (adev->vce.harvest_config & (1 << i))
902 			continue;
903 
904 		WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
905 
906 		if (enable) {
907 			/* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
908 			uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
909 			data &= ~(0xf | 0xff0);
910 			data |= ((0x0 << 0) | (0x04 << 4));
911 			WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
912 
913 			/* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
914 			data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
915 			data &= ~(0xf | 0xff0);
916 			data |= ((0x0 << 0) | (0x04 << 4));
917 			WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
918 		}
919 
920 		vce_v4_0_set_vce_sw_clock_gating(adev, enable);
921 	}
922 
923 	WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
924 	mutex_unlock(&adev->grbm_idx_mutex);
925 
926 	return 0;
927 }
928 #endif
929 
930 static int vce_v4_0_set_powergating_state(void *handle,
931 					  enum amd_powergating_state state)
932 {
933 	/* This doesn't actually powergate the VCE block.
934 	 * That's done in the dpm code via the SMC.  This
935 	 * just re-inits the block as necessary.  The actual
936 	 * gating still happens in the dpm code.  We should
937 	 * revisit this when there is a cleaner line between
938 	 * the smc and the hw blocks
939 	 */
940 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
941 
942 	if (state == AMD_PG_STATE_GATE)
943 		return vce_v4_0_stop(adev);
944 	else
945 		return vce_v4_0_start(adev);
946 }
947 
948 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
949 					struct amdgpu_ib *ib, uint32_t flags)
950 {
951 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
952 
953 	amdgpu_ring_write(ring, VCE_CMD_IB_VM);
954 	amdgpu_ring_write(ring, vmid);
955 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
956 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
957 	amdgpu_ring_write(ring, ib->length_dw);
958 }
959 
960 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
961 			u64 seq, unsigned flags)
962 {
963 	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
964 
965 	amdgpu_ring_write(ring, VCE_CMD_FENCE);
966 	amdgpu_ring_write(ring, addr);
967 	amdgpu_ring_write(ring, upper_32_bits(addr));
968 	amdgpu_ring_write(ring, seq);
969 	amdgpu_ring_write(ring, VCE_CMD_TRAP);
970 }
971 
972 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
973 {
974 	amdgpu_ring_write(ring, VCE_CMD_END);
975 }
976 
977 static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
978 				   uint32_t val, uint32_t mask)
979 {
980 	amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
981 	amdgpu_ring_write(ring,	reg << 2);
982 	amdgpu_ring_write(ring, mask);
983 	amdgpu_ring_write(ring, val);
984 }
985 
986 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
987 				   unsigned int vmid, uint64_t pd_addr)
988 {
989 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
990 
991 	pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
992 
993 	/* wait for reg writes */
994 	vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 +
995 			       vmid * hub->ctx_addr_distance,
996 			       lower_32_bits(pd_addr), 0xffffffff);
997 }
998 
999 static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring,
1000 			       uint32_t reg, uint32_t val)
1001 {
1002 	amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
1003 	amdgpu_ring_write(ring,	reg << 2);
1004 	amdgpu_ring_write(ring, val);
1005 }
1006 
1007 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
1008 					struct amdgpu_irq_src *source,
1009 					unsigned type,
1010 					enum amdgpu_interrupt_state state)
1011 {
1012 	uint32_t val = 0;
1013 
1014 	if (!amdgpu_sriov_vf(adev)) {
1015 		if (state == AMDGPU_IRQ_STATE_ENABLE)
1016 			val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
1017 
1018 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
1019 				~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
1020 	}
1021 	return 0;
1022 }
1023 
1024 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1025 				      struct amdgpu_irq_src *source,
1026 				      struct amdgpu_iv_entry *entry)
1027 {
1028 	DRM_DEBUG("IH: VCE\n");
1029 
1030 	switch (entry->src_data[0]) {
1031 	case 0:
1032 	case 1:
1033 	case 2:
1034 		amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
1035 		break;
1036 	default:
1037 		DRM_ERROR("Unhandled interrupt: %d %d\n",
1038 			  entry->src_id, entry->src_data[0]);
1039 		break;
1040 	}
1041 
1042 	return 0;
1043 }
1044 
1045 const struct amd_ip_funcs vce_v4_0_ip_funcs = {
1046 	.name = "vce_v4_0",
1047 	.early_init = vce_v4_0_early_init,
1048 	.late_init = NULL,
1049 	.sw_init = vce_v4_0_sw_init,
1050 	.sw_fini = vce_v4_0_sw_fini,
1051 	.hw_init = vce_v4_0_hw_init,
1052 	.hw_fini = vce_v4_0_hw_fini,
1053 	.suspend = vce_v4_0_suspend,
1054 	.resume = vce_v4_0_resume,
1055 	.is_idle = NULL /* vce_v4_0_is_idle */,
1056 	.wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1057 	.check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1058 	.pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1059 	.soft_reset = NULL /* vce_v4_0_soft_reset */,
1060 	.post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1061 	.set_clockgating_state = vce_v4_0_set_clockgating_state,
1062 	.set_powergating_state = vce_v4_0_set_powergating_state,
1063 };
1064 
1065 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1066 	.type = AMDGPU_RING_TYPE_VCE,
1067 	.align_mask = 0x3f,
1068 	.nop = VCE_CMD_NO_OP,
1069 	.support_64bit_ptrs = false,
1070 	.no_user_fence = true,
1071 	.vmhub = AMDGPU_MMHUB_0,
1072 	.get_rptr = vce_v4_0_ring_get_rptr,
1073 	.get_wptr = vce_v4_0_ring_get_wptr,
1074 	.set_wptr = vce_v4_0_ring_set_wptr,
1075 	.parse_cs = amdgpu_vce_ring_parse_cs_vm,
1076 	.emit_frame_size =
1077 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1078 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1079 		4 + /* vce_v4_0_emit_vm_flush */
1080 		5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1081 		1, /* vce_v4_0_ring_insert_end */
1082 	.emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1083 	.emit_ib = vce_v4_0_ring_emit_ib,
1084 	.emit_vm_flush = vce_v4_0_emit_vm_flush,
1085 	.emit_fence = vce_v4_0_ring_emit_fence,
1086 	.test_ring = amdgpu_vce_ring_test_ring,
1087 	.test_ib = amdgpu_vce_ring_test_ib,
1088 	.insert_nop = amdgpu_ring_insert_nop,
1089 	.insert_end = vce_v4_0_ring_insert_end,
1090 	.pad_ib = amdgpu_ring_generic_pad_ib,
1091 	.begin_use = amdgpu_vce_ring_begin_use,
1092 	.end_use = amdgpu_vce_ring_end_use,
1093 	.emit_wreg = vce_v4_0_emit_wreg,
1094 	.emit_reg_wait = vce_v4_0_emit_reg_wait,
1095 	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1096 };
1097 
1098 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1099 {
1100 	int i;
1101 
1102 	for (i = 0; i < adev->vce.num_rings; i++) {
1103 		adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1104 		adev->vce.ring[i].me = i;
1105 	}
1106 	DRM_INFO("VCE enabled in VM mode\n");
1107 }
1108 
1109 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1110 	.set = vce_v4_0_set_interrupt_state,
1111 	.process = vce_v4_0_process_interrupt,
1112 };
1113 
1114 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1115 {
1116 	adev->vce.irq.num_types = 1;
1117 	adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1118 };
1119 
1120 const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1121 {
1122 	.type = AMD_IP_BLOCK_TYPE_VCE,
1123 	.major = 4,
1124 	.minor = 0,
1125 	.rev = 0,
1126 	.funcs = &vce_v4_0_ip_funcs,
1127 };
1128