xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c (revision a6ca5ac746d104019e76c29e69c2a1fc6dd2b29f)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  */
26 
27 #include <linux/firmware.h>
28 #include <drm/drmP.h>
29 #include "amdgpu.h"
30 #include "amdgpu_vce.h"
31 #include "soc15d.h"
32 #include "soc15_common.h"
33 #include "mmsch_v1_0.h"
34 
35 #include "vega10/soc15ip.h"
36 #include "vega10/VCE/vce_4_0_offset.h"
37 #include "vega10/VCE/vce_4_0_default.h"
38 #include "vega10/VCE/vce_4_0_sh_mask.h"
39 #include "vega10/MMHUB/mmhub_1_0_offset.h"
40 #include "vega10/MMHUB/mmhub_1_0_sh_mask.h"
41 
42 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
43 
44 #define VCE_V4_0_FW_SIZE	(384 * 1024)
45 #define VCE_V4_0_STACK_SIZE	(64 * 1024)
46 #define VCE_V4_0_DATA_SIZE	((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
47 
48 static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
49 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
50 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
51 
52 /**
53  * vce_v4_0_ring_get_rptr - get read pointer
54  *
55  * @ring: amdgpu_ring pointer
56  *
57  * Returns the current hardware read pointer
58  */
59 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
60 {
61 	struct amdgpu_device *adev = ring->adev;
62 
63 	if (ring == &adev->vce.ring[0])
64 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
65 	else if (ring == &adev->vce.ring[1])
66 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
67 	else
68 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
69 }
70 
71 /**
72  * vce_v4_0_ring_get_wptr - get write pointer
73  *
74  * @ring: amdgpu_ring pointer
75  *
76  * Returns the current hardware write pointer
77  */
78 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
79 {
80 	struct amdgpu_device *adev = ring->adev;
81 
82 	if (ring->use_doorbell)
83 		return adev->wb.wb[ring->wptr_offs];
84 
85 	if (ring == &adev->vce.ring[0])
86 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
87 	else if (ring == &adev->vce.ring[1])
88 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
89 	else
90 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
91 }
92 
93 /**
94  * vce_v4_0_ring_set_wptr - set write pointer
95  *
96  * @ring: amdgpu_ring pointer
97  *
98  * Commits the write pointer to the hardware
99  */
100 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
101 {
102 	struct amdgpu_device *adev = ring->adev;
103 
104 	if (ring->use_doorbell) {
105 		/* XXX check if swapping is necessary on BE */
106 		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
107 		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
108 		return;
109 	}
110 
111 	if (ring == &adev->vce.ring[0])
112 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
113 			lower_32_bits(ring->wptr));
114 	else if (ring == &adev->vce.ring[1])
115 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
116 			lower_32_bits(ring->wptr));
117 	else
118 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
119 			lower_32_bits(ring->wptr));
120 }
121 
122 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
123 {
124 	int i, j;
125 
126 	for (i = 0; i < 10; ++i) {
127 		for (j = 0; j < 100; ++j) {
128 			uint32_t status =
129 				RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
130 
131 			if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
132 				return 0;
133 			mdelay(10);
134 		}
135 
136 		DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
137 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
138 				VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
139 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
140 		mdelay(10);
141 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
142 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
143 		mdelay(10);
144 
145 	}
146 
147 	return -ETIMEDOUT;
148 }
149 
150 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
151 				struct amdgpu_mm_table *table)
152 {
153 	uint32_t data = 0, loop;
154 	uint64_t addr = table->gpu_addr;
155 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
156 	uint32_t size;
157 
158 	size = header->header_size + header->vce_table_size + header->uvd_table_size;
159 
160 	/* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
161 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
162 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
163 
164 	/* 2, update vmid of descriptor */
165 	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
166 	data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
167 	data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
168 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
169 
170 	/* 3, notify mmsch about the size of this descriptor */
171 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
172 
173 	/* 4, set resp to zero */
174 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
175 
176 	/* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
177 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
178 
179 	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
180 	loop = 1000;
181 	while ((data & 0x10000002) != 0x10000002) {
182 		udelay(10);
183 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
184 		loop--;
185 		if (!loop)
186 			break;
187 	}
188 
189 	if (!loop) {
190 		dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
191 		return -EBUSY;
192 	}
193 	WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
194 
195 	return 0;
196 }
197 
198 static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
199 {
200 	struct amdgpu_ring *ring;
201 	uint32_t offset, size;
202 	uint32_t table_size = 0;
203 	struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
204 	struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
205 	struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
206 	struct mmsch_v1_0_cmd_end end = { { 0 } };
207 	uint32_t *init_table = adev->virt.mm_table.cpu_addr;
208 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
209 
210 	direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
211 	direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
212 	direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
213 	end.cmd_header.command_type = MMSCH_COMMAND__END;
214 
215 	if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
216 		header->version = MMSCH_VERSION;
217 		header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
218 
219 		if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
220 			header->vce_table_offset = header->header_size;
221 		else
222 			header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
223 
224 		init_table += header->vce_table_offset;
225 
226 		ring = &adev->vce.ring[0];
227 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
228 					    lower_32_bits(ring->gpu_addr));
229 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
230 					    upper_32_bits(ring->gpu_addr));
231 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
232 					    ring->ring_size / 4);
233 
234 		/* BEGING OF MC_RESUME */
235 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
236 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
237 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
238 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
239 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
240 
241 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
242 		    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
243 						adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
244 		    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
245 						adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
246 		    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
247 						adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
248 		} else {
249 		    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
250 						adev->vce.gpu_addr >> 8);
251 		    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
252 						adev->vce.gpu_addr >> 8);
253 		    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
254 						adev->vce.gpu_addr >> 8);
255 		}
256 
257 		offset = AMDGPU_VCE_FIRMWARE_OFFSET;
258 		size = VCE_V4_0_FW_SIZE;
259 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
260 					    offset & 0x7FFFFFFF);
261 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
262 
263 		offset += size;
264 		size = VCE_V4_0_STACK_SIZE;
265 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
266 					    offset & 0x7FFFFFFF);
267 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
268 
269 		offset += size;
270 		size = VCE_V4_0_DATA_SIZE;
271 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
272 					    offset & 0x7FFFFFFF);
273 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
274 
275 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
276 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
277 						   0xffffffff, VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
278 
279 		/* end of MC_RESUME */
280 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
281 						   VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
282 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
283 						   ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
284 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
285 						   ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
286 
287 		MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
288 					      VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
289 					      VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
290 
291 		/* clear BUSY flag */
292 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
293 						   ~VCE_STATUS__JOB_BUSY_MASK, 0);
294 
295 		/* add end packet */
296 		memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
297 		table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
298 		header->vce_table_size = table_size;
299 
300 		return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
301 	}
302 
303 	return -EINVAL; /* already initializaed ? */
304 }
305 
306 /**
307  * vce_v4_0_start - start VCE block
308  *
309  * @adev: amdgpu_device pointer
310  *
311  * Setup and start the VCE block
312  */
313 static int vce_v4_0_start(struct amdgpu_device *adev)
314 {
315 	struct amdgpu_ring *ring;
316 	int r;
317 
318 	ring = &adev->vce.ring[0];
319 
320 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
321 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
322 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
323 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
324 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
325 
326 	ring = &adev->vce.ring[1];
327 
328 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
329 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
330 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
331 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
332 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
333 
334 	ring = &adev->vce.ring[2];
335 
336 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
337 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
338 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
339 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
340 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
341 
342 	vce_v4_0_mc_resume(adev);
343 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
344 			~VCE_STATUS__JOB_BUSY_MASK);
345 
346 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
347 
348 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
349 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
350 	mdelay(100);
351 
352 	r = vce_v4_0_firmware_loaded(adev);
353 
354 	/* clear BUSY flag */
355 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
356 
357 	if (r) {
358 		DRM_ERROR("VCE not responding, giving up!!!\n");
359 		return r;
360 	}
361 
362 	return 0;
363 }
364 
365 static int vce_v4_0_stop(struct amdgpu_device *adev)
366 {
367 
368 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
369 
370 	/* hold on ECPU */
371 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
372 			VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
373 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
374 
375 	/* clear BUSY flag */
376 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
377 
378 	/* Set Clock-Gating off */
379 	/* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
380 		vce_v4_0_set_vce_sw_clock_gating(adev, false);
381 	*/
382 
383 	return 0;
384 }
385 
386 static int vce_v4_0_early_init(void *handle)
387 {
388 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
389 
390 	if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
391 		adev->vce.num_rings = 1;
392 	else
393 		adev->vce.num_rings = 3;
394 
395 	vce_v4_0_set_ring_funcs(adev);
396 	vce_v4_0_set_irq_funcs(adev);
397 
398 	return 0;
399 }
400 
401 static int vce_v4_0_sw_init(void *handle)
402 {
403 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
404 	struct amdgpu_ring *ring;
405 	unsigned size;
406 	int r, i;
407 
408 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
409 	if (r)
410 		return r;
411 
412 	size  = (VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE) * 2;
413 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
414 		size += VCE_V4_0_FW_SIZE;
415 
416 	r = amdgpu_vce_sw_init(adev, size);
417 	if (r)
418 		return r;
419 
420 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
421 		const struct common_firmware_header *hdr;
422 		hdr = (const struct common_firmware_header *)adev->vce.fw->data;
423 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
424 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
425 		adev->firmware.fw_size +=
426 			ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
427 		DRM_INFO("PSP loading VCE firmware\n");
428 	}
429 
430 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
431 		r = amdgpu_vce_resume(adev);
432 		if (r)
433 			return r;
434 	}
435 
436 	for (i = 0; i < adev->vce.num_rings; i++) {
437 		ring = &adev->vce.ring[i];
438 		sprintf(ring->name, "vce%d", i);
439 		if (amdgpu_sriov_vf(adev)) {
440 			/* DOORBELL only works under SRIOV */
441 			ring->use_doorbell = true;
442 			if (i == 0)
443 				ring->doorbell_index = AMDGPU_DOORBELL64_RING0_1 * 2;
444 			else if (i == 1)
445 				ring->doorbell_index = AMDGPU_DOORBELL64_RING2_3 * 2;
446 			else
447 				ring->doorbell_index = AMDGPU_DOORBELL64_RING2_3 * 2 + 1;
448 		}
449 		r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
450 		if (r)
451 			return r;
452 	}
453 
454 	r = amdgpu_virt_alloc_mm_table(adev);
455 	if (r)
456 		return r;
457 
458 	return r;
459 }
460 
461 static int vce_v4_0_sw_fini(void *handle)
462 {
463 	int r;
464 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
465 
466 	/* free MM table */
467 	amdgpu_virt_free_mm_table(adev);
468 
469 	r = amdgpu_vce_suspend(adev);
470 	if (r)
471 		return r;
472 
473 	return amdgpu_vce_sw_fini(adev);
474 }
475 
476 static int vce_v4_0_hw_init(void *handle)
477 {
478 	int r, i;
479 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
480 
481 	if (amdgpu_sriov_vf(adev))
482 		r = vce_v4_0_sriov_start(adev);
483 	else
484 		r = vce_v4_0_start(adev);
485 	if (r)
486 		return r;
487 
488 	for (i = 0; i < adev->vce.num_rings; i++)
489 		adev->vce.ring[i].ready = false;
490 
491 	for (i = 0; i < adev->vce.num_rings; i++) {
492 		r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
493 		if (r)
494 			return r;
495 		else
496 			adev->vce.ring[i].ready = true;
497 	}
498 
499 	DRM_INFO("VCE initialized successfully.\n");
500 
501 	return 0;
502 }
503 
504 static int vce_v4_0_hw_fini(void *handle)
505 {
506 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
507 	int i;
508 
509 	if (!amdgpu_sriov_vf(adev)) {
510 		/* vce_v4_0_wait_for_idle(handle); */
511 		vce_v4_0_stop(adev);
512 	} else {
513 		/* full access mode, so don't touch any VCE register */
514 		DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
515 	}
516 
517 	for (i = 0; i < adev->vce.num_rings; i++)
518 		adev->vce.ring[i].ready = false;
519 
520 	return 0;
521 }
522 
523 static int vce_v4_0_suspend(void *handle)
524 {
525 	int r;
526 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
527 
528 	r = vce_v4_0_hw_fini(adev);
529 	if (r)
530 		return r;
531 
532 	return amdgpu_vce_suspend(adev);
533 }
534 
535 static int vce_v4_0_resume(void *handle)
536 {
537 	int r;
538 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
539 
540 	r = amdgpu_vce_resume(adev);
541 	if (r)
542 		return r;
543 
544 	return vce_v4_0_hw_init(adev);
545 }
546 
547 static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
548 {
549 	uint32_t offset, size;
550 
551 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
552 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
553 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
554 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
555 
556 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
557 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
558 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
559 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
560 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
561 
562 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
563 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
564 			(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8));
565 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
566 			(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff);
567 	} else {
568 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
569 			(adev->vce.gpu_addr >> 8));
570 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
571 			(adev->vce.gpu_addr >> 40) & 0xff);
572 	}
573 
574 	offset = AMDGPU_VCE_FIRMWARE_OFFSET;
575 	size = VCE_V4_0_FW_SIZE;
576 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
577 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
578 
579 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
580 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
581 	offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
582 	size = VCE_V4_0_STACK_SIZE;
583 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
584 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
585 
586 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
587 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
588 	offset += size;
589 	size = VCE_V4_0_DATA_SIZE;
590 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
591 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
592 
593 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
594 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
595 			VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
596 			~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
597 }
598 
599 static int vce_v4_0_set_clockgating_state(void *handle,
600 					  enum amd_clockgating_state state)
601 {
602 	/* needed for driver unload*/
603 	return 0;
604 }
605 
606 #if 0
607 static bool vce_v4_0_is_idle(void *handle)
608 {
609 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
610 	u32 mask = 0;
611 
612 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
613 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
614 
615 	return !(RREG32(mmSRBM_STATUS2) & mask);
616 }
617 
618 static int vce_v4_0_wait_for_idle(void *handle)
619 {
620 	unsigned i;
621 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
622 
623 	for (i = 0; i < adev->usec_timeout; i++)
624 		if (vce_v4_0_is_idle(handle))
625 			return 0;
626 
627 	return -ETIMEDOUT;
628 }
629 
630 #define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
631 #define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
632 #define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
633 #define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
634 				      VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
635 
636 static bool vce_v4_0_check_soft_reset(void *handle)
637 {
638 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
639 	u32 srbm_soft_reset = 0;
640 
641 	/* According to VCE team , we should use VCE_STATUS instead
642 	 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
643 	 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
644 	 * instance's registers are accessed
645 	 * (0 for 1st instance, 10 for 2nd instance).
646 	 *
647 	 *VCE_STATUS
648 	 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
649 	 *|----+----+-----------+----+----+----+----------+---------+----|
650 	 *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
651 	 *
652 	 * VCE team suggest use bit 3--bit 6 for busy status check
653 	 */
654 	mutex_lock(&adev->grbm_idx_mutex);
655 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
656 	if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
657 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
658 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
659 	}
660 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
661 	if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
662 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
663 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
664 	}
665 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
666 	mutex_unlock(&adev->grbm_idx_mutex);
667 
668 	if (srbm_soft_reset) {
669 		adev->vce.srbm_soft_reset = srbm_soft_reset;
670 		return true;
671 	} else {
672 		adev->vce.srbm_soft_reset = 0;
673 		return false;
674 	}
675 }
676 
677 static int vce_v4_0_soft_reset(void *handle)
678 {
679 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
680 	u32 srbm_soft_reset;
681 
682 	if (!adev->vce.srbm_soft_reset)
683 		return 0;
684 	srbm_soft_reset = adev->vce.srbm_soft_reset;
685 
686 	if (srbm_soft_reset) {
687 		u32 tmp;
688 
689 		tmp = RREG32(mmSRBM_SOFT_RESET);
690 		tmp |= srbm_soft_reset;
691 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
692 		WREG32(mmSRBM_SOFT_RESET, tmp);
693 		tmp = RREG32(mmSRBM_SOFT_RESET);
694 
695 		udelay(50);
696 
697 		tmp &= ~srbm_soft_reset;
698 		WREG32(mmSRBM_SOFT_RESET, tmp);
699 		tmp = RREG32(mmSRBM_SOFT_RESET);
700 
701 		/* Wait a little for things to settle down */
702 		udelay(50);
703 	}
704 
705 	return 0;
706 }
707 
708 static int vce_v4_0_pre_soft_reset(void *handle)
709 {
710 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
711 
712 	if (!adev->vce.srbm_soft_reset)
713 		return 0;
714 
715 	mdelay(5);
716 
717 	return vce_v4_0_suspend(adev);
718 }
719 
720 
721 static int vce_v4_0_post_soft_reset(void *handle)
722 {
723 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
724 
725 	if (!adev->vce.srbm_soft_reset)
726 		return 0;
727 
728 	mdelay(5);
729 
730 	return vce_v4_0_resume(adev);
731 }
732 
733 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
734 {
735 	u32 tmp, data;
736 
737 	tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
738 	if (override)
739 		data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
740 	else
741 		data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
742 
743 	if (tmp != data)
744 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
745 }
746 
747 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
748 					     bool gated)
749 {
750 	u32 data;
751 
752 	/* Set Override to disable Clock Gating */
753 	vce_v4_0_override_vce_clock_gating(adev, true);
754 
755 	/* This function enables MGCG which is controlled by firmware.
756 	   With the clocks in the gated state the core is still
757 	   accessible but the firmware will throttle the clocks on the
758 	   fly as necessary.
759 	*/
760 	if (gated) {
761 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
762 		data |= 0x1ff;
763 		data &= ~0xef0000;
764 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
765 
766 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
767 		data |= 0x3ff000;
768 		data &= ~0xffc00000;
769 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
770 
771 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
772 		data |= 0x2;
773 		data &= ~0x00010000;
774 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
775 
776 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
777 		data |= 0x37f;
778 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
779 
780 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
781 		data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
782 			VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
783 			VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
784 			0x8;
785 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
786 	} else {
787 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
788 		data &= ~0x80010;
789 		data |= 0xe70008;
790 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
791 
792 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
793 		data |= 0xffc00000;
794 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
795 
796 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
797 		data |= 0x10000;
798 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
799 
800 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
801 		data &= ~0xffc00000;
802 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
803 
804 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
805 		data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
806 			  VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
807 			  VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
808 			  0x8);
809 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
810 	}
811 	vce_v4_0_override_vce_clock_gating(adev, false);
812 }
813 
814 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
815 {
816 	u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
817 
818 	if (enable)
819 		tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
820 	else
821 		tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
822 
823 	WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
824 }
825 
826 static int vce_v4_0_set_clockgating_state(void *handle,
827 					  enum amd_clockgating_state state)
828 {
829 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
830 	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
831 	int i;
832 
833 	if ((adev->asic_type == CHIP_POLARIS10) ||
834 		(adev->asic_type == CHIP_TONGA) ||
835 		(adev->asic_type == CHIP_FIJI))
836 		vce_v4_0_set_bypass_mode(adev, enable);
837 
838 	if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
839 		return 0;
840 
841 	mutex_lock(&adev->grbm_idx_mutex);
842 	for (i = 0; i < 2; i++) {
843 		/* Program VCE Instance 0 or 1 if not harvested */
844 		if (adev->vce.harvest_config & (1 << i))
845 			continue;
846 
847 		WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
848 
849 		if (enable) {
850 			/* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
851 			uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
852 			data &= ~(0xf | 0xff0);
853 			data |= ((0x0 << 0) | (0x04 << 4));
854 			WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
855 
856 			/* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
857 			data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
858 			data &= ~(0xf | 0xff0);
859 			data |= ((0x0 << 0) | (0x04 << 4));
860 			WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
861 		}
862 
863 		vce_v4_0_set_vce_sw_clock_gating(adev, enable);
864 	}
865 
866 	WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
867 	mutex_unlock(&adev->grbm_idx_mutex);
868 
869 	return 0;
870 }
871 
872 static int vce_v4_0_set_powergating_state(void *handle,
873 					  enum amd_powergating_state state)
874 {
875 	/* This doesn't actually powergate the VCE block.
876 	 * That's done in the dpm code via the SMC.  This
877 	 * just re-inits the block as necessary.  The actual
878 	 * gating still happens in the dpm code.  We should
879 	 * revisit this when there is a cleaner line between
880 	 * the smc and the hw blocks
881 	 */
882 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
883 
884 	if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE))
885 		return 0;
886 
887 	if (state == AMD_PG_STATE_GATE)
888 		/* XXX do we need a vce_v4_0_stop()? */
889 		return 0;
890 	else
891 		return vce_v4_0_start(adev);
892 }
893 #endif
894 
895 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
896 		struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch)
897 {
898 	amdgpu_ring_write(ring, VCE_CMD_IB_VM);
899 	amdgpu_ring_write(ring, vm_id);
900 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
901 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
902 	amdgpu_ring_write(ring, ib->length_dw);
903 }
904 
905 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
906 			u64 seq, unsigned flags)
907 {
908 	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
909 
910 	amdgpu_ring_write(ring, VCE_CMD_FENCE);
911 	amdgpu_ring_write(ring, addr);
912 	amdgpu_ring_write(ring, upper_32_bits(addr));
913 	amdgpu_ring_write(ring, seq);
914 	amdgpu_ring_write(ring, VCE_CMD_TRAP);
915 }
916 
917 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
918 {
919 	amdgpu_ring_write(ring, VCE_CMD_END);
920 }
921 
922 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
923 			 unsigned int vm_id, uint64_t pd_addr)
924 {
925 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
926 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
927 	unsigned eng = ring->vm_inv_eng;
928 
929 	pd_addr = pd_addr | 0x1; /* valid bit */
930 	/* now only use physical base address of PDE and valid */
931 	BUG_ON(pd_addr & 0xFFFF00000000003EULL);
932 
933 	amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
934 	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
935 	amdgpu_ring_write(ring, upper_32_bits(pd_addr));
936 
937 	amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
938 	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
939 	amdgpu_ring_write(ring, lower_32_bits(pd_addr));
940 
941 	amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
942 	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
943 	amdgpu_ring_write(ring, 0xffffffff);
944 	amdgpu_ring_write(ring, lower_32_bits(pd_addr));
945 
946 	/* flush TLB */
947 	amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
948 	amdgpu_ring_write(ring,	(hub->vm_inv_eng0_req + eng) << 2);
949 	amdgpu_ring_write(ring, req);
950 
951 	/* wait for flush */
952 	amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
953 	amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
954 	amdgpu_ring_write(ring, 1 << vm_id);
955 	amdgpu_ring_write(ring, 1 << vm_id);
956 }
957 
958 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
959 					struct amdgpu_irq_src *source,
960 					unsigned type,
961 					enum amdgpu_interrupt_state state)
962 {
963 	uint32_t val = 0;
964 
965 	if (state == AMDGPU_IRQ_STATE_ENABLE)
966 		val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
967 
968 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
969 			~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
970 	return 0;
971 }
972 
973 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
974 				      struct amdgpu_irq_src *source,
975 				      struct amdgpu_iv_entry *entry)
976 {
977 	DRM_DEBUG("IH: VCE\n");
978 
979 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_STATUS),
980 			VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK,
981 			~VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK);
982 
983 	switch (entry->src_data[0]) {
984 	case 0:
985 	case 1:
986 	case 2:
987 		amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
988 		break;
989 	default:
990 		DRM_ERROR("Unhandled interrupt: %d %d\n",
991 			  entry->src_id, entry->src_data[0]);
992 		break;
993 	}
994 
995 	return 0;
996 }
997 
998 const struct amd_ip_funcs vce_v4_0_ip_funcs = {
999 	.name = "vce_v4_0",
1000 	.early_init = vce_v4_0_early_init,
1001 	.late_init = NULL,
1002 	.sw_init = vce_v4_0_sw_init,
1003 	.sw_fini = vce_v4_0_sw_fini,
1004 	.hw_init = vce_v4_0_hw_init,
1005 	.hw_fini = vce_v4_0_hw_fini,
1006 	.suspend = vce_v4_0_suspend,
1007 	.resume = vce_v4_0_resume,
1008 	.is_idle = NULL /* vce_v4_0_is_idle */,
1009 	.wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1010 	.check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1011 	.pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1012 	.soft_reset = NULL /* vce_v4_0_soft_reset */,
1013 	.post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1014 	.set_clockgating_state = vce_v4_0_set_clockgating_state,
1015 	.set_powergating_state = NULL /* vce_v4_0_set_powergating_state */,
1016 };
1017 
1018 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1019 	.type = AMDGPU_RING_TYPE_VCE,
1020 	.align_mask = 0x3f,
1021 	.nop = VCE_CMD_NO_OP,
1022 	.support_64bit_ptrs = false,
1023 	.vmhub = AMDGPU_MMHUB,
1024 	.get_rptr = vce_v4_0_ring_get_rptr,
1025 	.get_wptr = vce_v4_0_ring_get_wptr,
1026 	.set_wptr = vce_v4_0_ring_set_wptr,
1027 	.parse_cs = amdgpu_vce_ring_parse_cs_vm,
1028 	.emit_frame_size =
1029 		17 + /* vce_v4_0_emit_vm_flush */
1030 		5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1031 		1, /* vce_v4_0_ring_insert_end */
1032 	.emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1033 	.emit_ib = vce_v4_0_ring_emit_ib,
1034 	.emit_vm_flush = vce_v4_0_emit_vm_flush,
1035 	.emit_fence = vce_v4_0_ring_emit_fence,
1036 	.test_ring = amdgpu_vce_ring_test_ring,
1037 	.test_ib = amdgpu_vce_ring_test_ib,
1038 	.insert_nop = amdgpu_ring_insert_nop,
1039 	.insert_end = vce_v4_0_ring_insert_end,
1040 	.pad_ib = amdgpu_ring_generic_pad_ib,
1041 	.begin_use = amdgpu_vce_ring_begin_use,
1042 	.end_use = amdgpu_vce_ring_end_use,
1043 };
1044 
1045 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1046 {
1047 	int i;
1048 
1049 	for (i = 0; i < adev->vce.num_rings; i++)
1050 		adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1051 	DRM_INFO("VCE enabled in VM mode\n");
1052 }
1053 
1054 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1055 	.set = vce_v4_0_set_interrupt_state,
1056 	.process = vce_v4_0_process_interrupt,
1057 };
1058 
1059 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1060 {
1061 	adev->vce.irq.num_types = 1;
1062 	adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1063 };
1064 
1065 const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1066 {
1067 	.type = AMD_IP_BLOCK_TYPE_VCE,
1068 	.major = 4,
1069 	.minor = 0,
1070 	.rev = 0,
1071 	.funcs = &vce_v4_0_ip_funcs,
1072 };
1073