xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c (revision 1675c3a2)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  */
26 
27 #include <linux/firmware.h>
28 
29 #include "amdgpu.h"
30 #include "amdgpu_vce.h"
31 #include "soc15.h"
32 #include "soc15d.h"
33 #include "soc15_common.h"
34 #include "mmsch_v1_0.h"
35 
36 #include "vce/vce_4_0_offset.h"
37 #include "vce/vce_4_0_default.h"
38 #include "vce/vce_4_0_sh_mask.h"
39 #include "mmhub/mmhub_1_0_offset.h"
40 #include "mmhub/mmhub_1_0_sh_mask.h"
41 
42 #include "ivsrcid/vce/irqsrcs_vce_4_0.h"
43 
44 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
45 
46 #define VCE_V4_0_FW_SIZE	(384 * 1024)
47 #define VCE_V4_0_STACK_SIZE	(64 * 1024)
48 #define VCE_V4_0_DATA_SIZE	((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
49 
50 static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
51 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
52 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
53 
54 /**
55  * vce_v4_0_ring_get_rptr - get read pointer
56  *
57  * @ring: amdgpu_ring pointer
58  *
59  * Returns the current hardware read pointer
60  */
61 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
62 {
63 	struct amdgpu_device *adev = ring->adev;
64 
65 	if (ring->me == 0)
66 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
67 	else if (ring->me == 1)
68 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
69 	else
70 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
71 }
72 
73 /**
74  * vce_v4_0_ring_get_wptr - get write pointer
75  *
76  * @ring: amdgpu_ring pointer
77  *
78  * Returns the current hardware write pointer
79  */
80 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
81 {
82 	struct amdgpu_device *adev = ring->adev;
83 
84 	if (ring->use_doorbell)
85 		return adev->wb.wb[ring->wptr_offs];
86 
87 	if (ring->me == 0)
88 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
89 	else if (ring->me == 1)
90 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
91 	else
92 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
93 }
94 
95 /**
96  * vce_v4_0_ring_set_wptr - set write pointer
97  *
98  * @ring: amdgpu_ring pointer
99  *
100  * Commits the write pointer to the hardware
101  */
102 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
103 {
104 	struct amdgpu_device *adev = ring->adev;
105 
106 	if (ring->use_doorbell) {
107 		/* XXX check if swapping is necessary on BE */
108 		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
109 		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
110 		return;
111 	}
112 
113 	if (ring->me == 0)
114 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
115 			lower_32_bits(ring->wptr));
116 	else if (ring->me == 1)
117 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
118 			lower_32_bits(ring->wptr));
119 	else
120 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
121 			lower_32_bits(ring->wptr));
122 }
123 
124 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
125 {
126 	int i, j;
127 
128 	for (i = 0; i < 10; ++i) {
129 		for (j = 0; j < 100; ++j) {
130 			uint32_t status =
131 				RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
132 
133 			if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
134 				return 0;
135 			mdelay(10);
136 		}
137 
138 		DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
139 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
140 				VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
141 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
142 		mdelay(10);
143 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
144 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
145 		mdelay(10);
146 
147 	}
148 
149 	return -ETIMEDOUT;
150 }
151 
152 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
153 				struct amdgpu_mm_table *table)
154 {
155 	uint32_t data = 0, loop;
156 	uint64_t addr = table->gpu_addr;
157 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
158 	uint32_t size;
159 
160 	size = header->header_size + header->vce_table_size + header->uvd_table_size;
161 
162 	/* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
163 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
164 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
165 
166 	/* 2, update vmid of descriptor */
167 	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
168 	data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
169 	data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
170 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
171 
172 	/* 3, notify mmsch about the size of this descriptor */
173 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
174 
175 	/* 4, set resp to zero */
176 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
177 
178 	WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
179 	adev->wb.wb[adev->vce.ring[0].wptr_offs] = 0;
180 	adev->vce.ring[0].wptr = 0;
181 	adev->vce.ring[0].wptr_old = 0;
182 
183 	/* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
184 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
185 
186 	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
187 	loop = 1000;
188 	while ((data & 0x10000002) != 0x10000002) {
189 		udelay(10);
190 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
191 		loop--;
192 		if (!loop)
193 			break;
194 	}
195 
196 	if (!loop) {
197 		dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
198 		return -EBUSY;
199 	}
200 
201 	return 0;
202 }
203 
204 static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
205 {
206 	struct amdgpu_ring *ring;
207 	uint32_t offset, size;
208 	uint32_t table_size = 0;
209 	struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
210 	struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
211 	struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
212 	struct mmsch_v1_0_cmd_end end = { { 0 } };
213 	uint32_t *init_table = adev->virt.mm_table.cpu_addr;
214 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
215 
216 	direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
217 	direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
218 	direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
219 	end.cmd_header.command_type = MMSCH_COMMAND__END;
220 
221 	if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
222 		header->version = MMSCH_VERSION;
223 		header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
224 
225 		if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
226 			header->vce_table_offset = header->header_size;
227 		else
228 			header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
229 
230 		init_table += header->vce_table_offset;
231 
232 		ring = &adev->vce.ring[0];
233 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
234 					    lower_32_bits(ring->gpu_addr));
235 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
236 					    upper_32_bits(ring->gpu_addr));
237 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
238 					    ring->ring_size / 4);
239 
240 		/* BEGING OF MC_RESUME */
241 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
242 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
243 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
244 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
245 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
246 
247 		offset = AMDGPU_VCE_FIRMWARE_OFFSET;
248 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
249 			uint32_t low = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
250 			uint32_t hi = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi;
251 			uint64_t tmr_mc_addr = (uint64_t)(hi) << 32 | low;
252 
253 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
254 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), tmr_mc_addr >> 8);
255 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
256 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
257 						(tmr_mc_addr >> 40) & 0xff);
258 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
259 		} else {
260 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
261 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
262 						adev->vce.gpu_addr >> 8);
263 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
264 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
265 						(adev->vce.gpu_addr >> 40) & 0xff);
266 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
267 						offset & ~0x0f000000);
268 
269 		}
270 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
271 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
272 						adev->vce.gpu_addr >> 8);
273 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
274 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR1),
275 						(adev->vce.gpu_addr >> 40) & 0xff);
276 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
277 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
278 						adev->vce.gpu_addr >> 8);
279 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
280 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR2),
281 						(adev->vce.gpu_addr >> 40) & 0xff);
282 
283 		size = VCE_V4_0_FW_SIZE;
284 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
285 
286 		offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
287 		size = VCE_V4_0_STACK_SIZE;
288 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
289 					(offset & ~0x0f000000) | (1 << 24));
290 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
291 
292 		offset += size;
293 		size = VCE_V4_0_DATA_SIZE;
294 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
295 					(offset & ~0x0f000000) | (2 << 24));
296 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
297 
298 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
299 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
300 						   VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
301 						   VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
302 
303 		/* end of MC_RESUME */
304 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
305 						   VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
306 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
307 						   ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
308 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
309 						   ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
310 
311 		MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
312 					      VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
313 					      VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
314 
315 		/* clear BUSY flag */
316 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
317 						   ~VCE_STATUS__JOB_BUSY_MASK, 0);
318 
319 		/* add end packet */
320 		memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
321 		table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
322 		header->vce_table_size = table_size;
323 	}
324 
325 	return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
326 }
327 
328 /**
329  * vce_v4_0_start - start VCE block
330  *
331  * @adev: amdgpu_device pointer
332  *
333  * Setup and start the VCE block
334  */
335 static int vce_v4_0_start(struct amdgpu_device *adev)
336 {
337 	struct amdgpu_ring *ring;
338 	int r;
339 
340 	ring = &adev->vce.ring[0];
341 
342 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
343 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
344 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
345 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
346 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
347 
348 	ring = &adev->vce.ring[1];
349 
350 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
351 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
352 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
353 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
354 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
355 
356 	ring = &adev->vce.ring[2];
357 
358 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
359 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
360 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
361 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
362 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
363 
364 	vce_v4_0_mc_resume(adev);
365 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
366 			~VCE_STATUS__JOB_BUSY_MASK);
367 
368 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
369 
370 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
371 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
372 	mdelay(100);
373 
374 	r = vce_v4_0_firmware_loaded(adev);
375 
376 	/* clear BUSY flag */
377 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
378 
379 	if (r) {
380 		DRM_ERROR("VCE not responding, giving up!!!\n");
381 		return r;
382 	}
383 
384 	return 0;
385 }
386 
387 static int vce_v4_0_stop(struct amdgpu_device *adev)
388 {
389 
390 	/* Disable VCPU */
391 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
392 
393 	/* hold on ECPU */
394 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
395 			VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
396 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
397 
398 	/* clear VCE_STATUS */
399 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0);
400 
401 	/* Set Clock-Gating off */
402 	/* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
403 		vce_v4_0_set_vce_sw_clock_gating(adev, false);
404 	*/
405 
406 	return 0;
407 }
408 
409 static int vce_v4_0_early_init(void *handle)
410 {
411 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
412 
413 	if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
414 		adev->vce.num_rings = 1;
415 	else
416 		adev->vce.num_rings = 3;
417 
418 	vce_v4_0_set_ring_funcs(adev);
419 	vce_v4_0_set_irq_funcs(adev);
420 
421 	return 0;
422 }
423 
424 static int vce_v4_0_sw_init(void *handle)
425 {
426 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
427 	struct amdgpu_ring *ring;
428 
429 	unsigned size;
430 	int r, i;
431 
432 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
433 	if (r)
434 		return r;
435 
436 	size  = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE;
437 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
438 		size += VCE_V4_0_FW_SIZE;
439 
440 	r = amdgpu_vce_sw_init(adev, size);
441 	if (r)
442 		return r;
443 
444 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
445 		const struct common_firmware_header *hdr;
446 		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
447 
448 		adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL);
449 		if (!adev->vce.saved_bo)
450 			return -ENOMEM;
451 
452 		hdr = (const struct common_firmware_header *)adev->vce.fw->data;
453 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
454 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
455 		adev->firmware.fw_size +=
456 			ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
457 		DRM_INFO("PSP loading VCE firmware\n");
458 	} else {
459 		r = amdgpu_vce_resume(adev);
460 		if (r)
461 			return r;
462 	}
463 
464 	for (i = 0; i < adev->vce.num_rings; i++) {
465 		ring = &adev->vce.ring[i];
466 		sprintf(ring->name, "vce%d", i);
467 		if (amdgpu_sriov_vf(adev)) {
468 			/* DOORBELL only works under SRIOV */
469 			ring->use_doorbell = true;
470 
471 			/* currently only use the first encoding ring for sriov,
472 			 * so set unused location for other unused rings.
473 			 */
474 			if (i == 0)
475 				ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring0_1 * 2;
476 			else
477 				ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring2_3 * 2 + 1;
478 		}
479 		r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
480 		if (r)
481 			return r;
482 	}
483 
484 
485 	r = amdgpu_vce_entity_init(adev);
486 	if (r)
487 		return r;
488 
489 	r = amdgpu_virt_alloc_mm_table(adev);
490 	if (r)
491 		return r;
492 
493 	return r;
494 }
495 
496 static int vce_v4_0_sw_fini(void *handle)
497 {
498 	int r;
499 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
500 
501 	/* free MM table */
502 	amdgpu_virt_free_mm_table(adev);
503 
504 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
505 		kvfree(adev->vce.saved_bo);
506 		adev->vce.saved_bo = NULL;
507 	}
508 
509 	r = amdgpu_vce_suspend(adev);
510 	if (r)
511 		return r;
512 
513 	return amdgpu_vce_sw_fini(adev);
514 }
515 
516 static int vce_v4_0_hw_init(void *handle)
517 {
518 	int r, i;
519 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
520 
521 	if (amdgpu_sriov_vf(adev))
522 		r = vce_v4_0_sriov_start(adev);
523 	else
524 		r = vce_v4_0_start(adev);
525 	if (r)
526 		return r;
527 
528 	for (i = 0; i < adev->vce.num_rings; i++) {
529 		r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
530 		if (r)
531 			return r;
532 	}
533 
534 	DRM_INFO("VCE initialized successfully.\n");
535 
536 	return 0;
537 }
538 
539 static int vce_v4_0_hw_fini(void *handle)
540 {
541 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
542 
543 	if (!amdgpu_sriov_vf(adev)) {
544 		/* vce_v4_0_wait_for_idle(handle); */
545 		vce_v4_0_stop(adev);
546 	} else {
547 		/* full access mode, so don't touch any VCE register */
548 		DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
549 	}
550 
551 	return 0;
552 }
553 
554 static int vce_v4_0_suspend(void *handle)
555 {
556 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
557 	int r;
558 
559 	if (adev->vce.vcpu_bo == NULL)
560 		return 0;
561 
562 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
563 		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
564 		void *ptr = adev->vce.cpu_addr;
565 
566 		memcpy_fromio(adev->vce.saved_bo, ptr, size);
567 	}
568 
569 	r = vce_v4_0_hw_fini(adev);
570 	if (r)
571 		return r;
572 
573 	return amdgpu_vce_suspend(adev);
574 }
575 
576 static int vce_v4_0_resume(void *handle)
577 {
578 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
579 	int r;
580 
581 	if (adev->vce.vcpu_bo == NULL)
582 		return -EINVAL;
583 
584 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
585 		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
586 		void *ptr = adev->vce.cpu_addr;
587 
588 		memcpy_toio(ptr, adev->vce.saved_bo, size);
589 	} else {
590 		r = amdgpu_vce_resume(adev);
591 		if (r)
592 			return r;
593 	}
594 
595 	return vce_v4_0_hw_init(adev);
596 }
597 
598 static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
599 {
600 	uint32_t offset, size;
601 	uint64_t tmr_mc_addr;
602 
603 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
604 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
605 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
606 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
607 
608 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
609 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
610 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
611 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
612 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
613 
614 	offset = AMDGPU_VCE_FIRMWARE_OFFSET;
615 
616 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
617 		tmr_mc_addr = (uint64_t)(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi) << 32 |
618 										adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
619 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
620 			(tmr_mc_addr >> 8));
621 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
622 			(tmr_mc_addr >> 40) & 0xff);
623 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
624 	} else {
625 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
626 			(adev->vce.gpu_addr >> 8));
627 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
628 			(adev->vce.gpu_addr >> 40) & 0xff);
629 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
630 	}
631 
632 	size = VCE_V4_0_FW_SIZE;
633 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
634 
635 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
636 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
637 	offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
638 	size = VCE_V4_0_STACK_SIZE;
639 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
640 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
641 
642 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
643 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
644 	offset += size;
645 	size = VCE_V4_0_DATA_SIZE;
646 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
647 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
648 
649 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
650 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
651 			VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
652 			~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
653 }
654 
655 static int vce_v4_0_set_clockgating_state(void *handle,
656 					  enum amd_clockgating_state state)
657 {
658 	/* needed for driver unload*/
659 	return 0;
660 }
661 
662 #if 0
663 static bool vce_v4_0_is_idle(void *handle)
664 {
665 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
666 	u32 mask = 0;
667 
668 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
669 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
670 
671 	return !(RREG32(mmSRBM_STATUS2) & mask);
672 }
673 
674 static int vce_v4_0_wait_for_idle(void *handle)
675 {
676 	unsigned i;
677 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
678 
679 	for (i = 0; i < adev->usec_timeout; i++)
680 		if (vce_v4_0_is_idle(handle))
681 			return 0;
682 
683 	return -ETIMEDOUT;
684 }
685 
686 #define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
687 #define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
688 #define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
689 #define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
690 				      VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
691 
692 static bool vce_v4_0_check_soft_reset(void *handle)
693 {
694 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
695 	u32 srbm_soft_reset = 0;
696 
697 	/* According to VCE team , we should use VCE_STATUS instead
698 	 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
699 	 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
700 	 * instance's registers are accessed
701 	 * (0 for 1st instance, 10 for 2nd instance).
702 	 *
703 	 *VCE_STATUS
704 	 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
705 	 *|----+----+-----------+----+----+----+----------+---------+----|
706 	 *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
707 	 *
708 	 * VCE team suggest use bit 3--bit 6 for busy status check
709 	 */
710 	mutex_lock(&adev->grbm_idx_mutex);
711 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
712 	if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
713 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
714 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
715 	}
716 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
717 	if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
718 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
719 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
720 	}
721 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
722 	mutex_unlock(&adev->grbm_idx_mutex);
723 
724 	if (srbm_soft_reset) {
725 		adev->vce.srbm_soft_reset = srbm_soft_reset;
726 		return true;
727 	} else {
728 		adev->vce.srbm_soft_reset = 0;
729 		return false;
730 	}
731 }
732 
733 static int vce_v4_0_soft_reset(void *handle)
734 {
735 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
736 	u32 srbm_soft_reset;
737 
738 	if (!adev->vce.srbm_soft_reset)
739 		return 0;
740 	srbm_soft_reset = adev->vce.srbm_soft_reset;
741 
742 	if (srbm_soft_reset) {
743 		u32 tmp;
744 
745 		tmp = RREG32(mmSRBM_SOFT_RESET);
746 		tmp |= srbm_soft_reset;
747 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
748 		WREG32(mmSRBM_SOFT_RESET, tmp);
749 		tmp = RREG32(mmSRBM_SOFT_RESET);
750 
751 		udelay(50);
752 
753 		tmp &= ~srbm_soft_reset;
754 		WREG32(mmSRBM_SOFT_RESET, tmp);
755 		tmp = RREG32(mmSRBM_SOFT_RESET);
756 
757 		/* Wait a little for things to settle down */
758 		udelay(50);
759 	}
760 
761 	return 0;
762 }
763 
764 static int vce_v4_0_pre_soft_reset(void *handle)
765 {
766 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
767 
768 	if (!adev->vce.srbm_soft_reset)
769 		return 0;
770 
771 	mdelay(5);
772 
773 	return vce_v4_0_suspend(adev);
774 }
775 
776 
777 static int vce_v4_0_post_soft_reset(void *handle)
778 {
779 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
780 
781 	if (!adev->vce.srbm_soft_reset)
782 		return 0;
783 
784 	mdelay(5);
785 
786 	return vce_v4_0_resume(adev);
787 }
788 
789 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
790 {
791 	u32 tmp, data;
792 
793 	tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
794 	if (override)
795 		data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
796 	else
797 		data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
798 
799 	if (tmp != data)
800 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
801 }
802 
803 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
804 					     bool gated)
805 {
806 	u32 data;
807 
808 	/* Set Override to disable Clock Gating */
809 	vce_v4_0_override_vce_clock_gating(adev, true);
810 
811 	/* This function enables MGCG which is controlled by firmware.
812 	   With the clocks in the gated state the core is still
813 	   accessible but the firmware will throttle the clocks on the
814 	   fly as necessary.
815 	*/
816 	if (gated) {
817 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
818 		data |= 0x1ff;
819 		data &= ~0xef0000;
820 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
821 
822 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
823 		data |= 0x3ff000;
824 		data &= ~0xffc00000;
825 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
826 
827 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
828 		data |= 0x2;
829 		data &= ~0x00010000;
830 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
831 
832 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
833 		data |= 0x37f;
834 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
835 
836 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
837 		data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
838 			VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
839 			VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
840 			0x8;
841 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
842 	} else {
843 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
844 		data &= ~0x80010;
845 		data |= 0xe70008;
846 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
847 
848 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
849 		data |= 0xffc00000;
850 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
851 
852 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
853 		data |= 0x10000;
854 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
855 
856 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
857 		data &= ~0xffc00000;
858 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
859 
860 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
861 		data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
862 			  VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
863 			  VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
864 			  0x8);
865 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
866 	}
867 	vce_v4_0_override_vce_clock_gating(adev, false);
868 }
869 
870 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
871 {
872 	u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
873 
874 	if (enable)
875 		tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
876 	else
877 		tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
878 
879 	WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
880 }
881 
882 static int vce_v4_0_set_clockgating_state(void *handle,
883 					  enum amd_clockgating_state state)
884 {
885 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
886 	bool enable = (state == AMD_CG_STATE_GATE);
887 	int i;
888 
889 	if ((adev->asic_type == CHIP_POLARIS10) ||
890 		(adev->asic_type == CHIP_TONGA) ||
891 		(adev->asic_type == CHIP_FIJI))
892 		vce_v4_0_set_bypass_mode(adev, enable);
893 
894 	if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
895 		return 0;
896 
897 	mutex_lock(&adev->grbm_idx_mutex);
898 	for (i = 0; i < 2; i++) {
899 		/* Program VCE Instance 0 or 1 if not harvested */
900 		if (adev->vce.harvest_config & (1 << i))
901 			continue;
902 
903 		WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
904 
905 		if (enable) {
906 			/* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
907 			uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
908 			data &= ~(0xf | 0xff0);
909 			data |= ((0x0 << 0) | (0x04 << 4));
910 			WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
911 
912 			/* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
913 			data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
914 			data &= ~(0xf | 0xff0);
915 			data |= ((0x0 << 0) | (0x04 << 4));
916 			WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
917 		}
918 
919 		vce_v4_0_set_vce_sw_clock_gating(adev, enable);
920 	}
921 
922 	WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
923 	mutex_unlock(&adev->grbm_idx_mutex);
924 
925 	return 0;
926 }
927 #endif
928 
929 static int vce_v4_0_set_powergating_state(void *handle,
930 					  enum amd_powergating_state state)
931 {
932 	/* This doesn't actually powergate the VCE block.
933 	 * That's done in the dpm code via the SMC.  This
934 	 * just re-inits the block as necessary.  The actual
935 	 * gating still happens in the dpm code.  We should
936 	 * revisit this when there is a cleaner line between
937 	 * the smc and the hw blocks
938 	 */
939 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
940 
941 	if (state == AMD_PG_STATE_GATE)
942 		return vce_v4_0_stop(adev);
943 	else
944 		return vce_v4_0_start(adev);
945 }
946 
947 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
948 					struct amdgpu_ib *ib, uint32_t flags)
949 {
950 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
951 
952 	amdgpu_ring_write(ring, VCE_CMD_IB_VM);
953 	amdgpu_ring_write(ring, vmid);
954 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
955 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
956 	amdgpu_ring_write(ring, ib->length_dw);
957 }
958 
959 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
960 			u64 seq, unsigned flags)
961 {
962 	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
963 
964 	amdgpu_ring_write(ring, VCE_CMD_FENCE);
965 	amdgpu_ring_write(ring, addr);
966 	amdgpu_ring_write(ring, upper_32_bits(addr));
967 	amdgpu_ring_write(ring, seq);
968 	amdgpu_ring_write(ring, VCE_CMD_TRAP);
969 }
970 
971 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
972 {
973 	amdgpu_ring_write(ring, VCE_CMD_END);
974 }
975 
976 static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
977 				   uint32_t val, uint32_t mask)
978 {
979 	amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
980 	amdgpu_ring_write(ring,	reg << 2);
981 	amdgpu_ring_write(ring, mask);
982 	amdgpu_ring_write(ring, val);
983 }
984 
985 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
986 				   unsigned int vmid, uint64_t pd_addr)
987 {
988 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
989 
990 	pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
991 
992 	/* wait for reg writes */
993 	vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2,
994 			       lower_32_bits(pd_addr), 0xffffffff);
995 }
996 
997 static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring,
998 			       uint32_t reg, uint32_t val)
999 {
1000 	amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
1001 	amdgpu_ring_write(ring,	reg << 2);
1002 	amdgpu_ring_write(ring, val);
1003 }
1004 
1005 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
1006 					struct amdgpu_irq_src *source,
1007 					unsigned type,
1008 					enum amdgpu_interrupt_state state)
1009 {
1010 	uint32_t val = 0;
1011 
1012 	if (!amdgpu_sriov_vf(adev)) {
1013 		if (state == AMDGPU_IRQ_STATE_ENABLE)
1014 			val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
1015 
1016 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
1017 				~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
1018 	}
1019 	return 0;
1020 }
1021 
1022 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1023 				      struct amdgpu_irq_src *source,
1024 				      struct amdgpu_iv_entry *entry)
1025 {
1026 	DRM_DEBUG("IH: VCE\n");
1027 
1028 	switch (entry->src_data[0]) {
1029 	case 0:
1030 	case 1:
1031 	case 2:
1032 		amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
1033 		break;
1034 	default:
1035 		DRM_ERROR("Unhandled interrupt: %d %d\n",
1036 			  entry->src_id, entry->src_data[0]);
1037 		break;
1038 	}
1039 
1040 	return 0;
1041 }
1042 
1043 const struct amd_ip_funcs vce_v4_0_ip_funcs = {
1044 	.name = "vce_v4_0",
1045 	.early_init = vce_v4_0_early_init,
1046 	.late_init = NULL,
1047 	.sw_init = vce_v4_0_sw_init,
1048 	.sw_fini = vce_v4_0_sw_fini,
1049 	.hw_init = vce_v4_0_hw_init,
1050 	.hw_fini = vce_v4_0_hw_fini,
1051 	.suspend = vce_v4_0_suspend,
1052 	.resume = vce_v4_0_resume,
1053 	.is_idle = NULL /* vce_v4_0_is_idle */,
1054 	.wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1055 	.check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1056 	.pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1057 	.soft_reset = NULL /* vce_v4_0_soft_reset */,
1058 	.post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1059 	.set_clockgating_state = vce_v4_0_set_clockgating_state,
1060 	.set_powergating_state = vce_v4_0_set_powergating_state,
1061 };
1062 
1063 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1064 	.type = AMDGPU_RING_TYPE_VCE,
1065 	.align_mask = 0x3f,
1066 	.nop = VCE_CMD_NO_OP,
1067 	.support_64bit_ptrs = false,
1068 	.no_user_fence = true,
1069 	.vmhub = AMDGPU_MMHUB_0,
1070 	.get_rptr = vce_v4_0_ring_get_rptr,
1071 	.get_wptr = vce_v4_0_ring_get_wptr,
1072 	.set_wptr = vce_v4_0_ring_set_wptr,
1073 	.parse_cs = amdgpu_vce_ring_parse_cs_vm,
1074 	.emit_frame_size =
1075 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1076 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1077 		4 + /* vce_v4_0_emit_vm_flush */
1078 		5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1079 		1, /* vce_v4_0_ring_insert_end */
1080 	.emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1081 	.emit_ib = vce_v4_0_ring_emit_ib,
1082 	.emit_vm_flush = vce_v4_0_emit_vm_flush,
1083 	.emit_fence = vce_v4_0_ring_emit_fence,
1084 	.test_ring = amdgpu_vce_ring_test_ring,
1085 	.test_ib = amdgpu_vce_ring_test_ib,
1086 	.insert_nop = amdgpu_ring_insert_nop,
1087 	.insert_end = vce_v4_0_ring_insert_end,
1088 	.pad_ib = amdgpu_ring_generic_pad_ib,
1089 	.begin_use = amdgpu_vce_ring_begin_use,
1090 	.end_use = amdgpu_vce_ring_end_use,
1091 	.emit_wreg = vce_v4_0_emit_wreg,
1092 	.emit_reg_wait = vce_v4_0_emit_reg_wait,
1093 	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1094 };
1095 
1096 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1097 {
1098 	int i;
1099 
1100 	for (i = 0; i < adev->vce.num_rings; i++) {
1101 		adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1102 		adev->vce.ring[i].me = i;
1103 	}
1104 	DRM_INFO("VCE enabled in VM mode\n");
1105 }
1106 
1107 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1108 	.set = vce_v4_0_set_interrupt_state,
1109 	.process = vce_v4_0_process_interrupt,
1110 };
1111 
1112 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1113 {
1114 	adev->vce.irq.num_types = 1;
1115 	adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1116 };
1117 
1118 const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1119 {
1120 	.type = AMD_IP_BLOCK_TYPE_VCE,
1121 	.major = 4,
1122 	.minor = 0,
1123 	.rev = 0,
1124 	.funcs = &vce_v4_0_ip_funcs,
1125 };
1126