xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c (revision e5f586c763a079349398e2b0c7c271386193ac34)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  */
26 
27 #include <linux/firmware.h>
28 #include <drm/drmP.h>
29 #include "amdgpu.h"
30 #include "amdgpu_vce.h"
31 #include "soc15d.h"
32 #include "soc15_common.h"
33 #include "mmsch_v1_0.h"
34 
35 #include "vega10/soc15ip.h"
36 #include "vega10/VCE/vce_4_0_offset.h"
37 #include "vega10/VCE/vce_4_0_default.h"
38 #include "vega10/VCE/vce_4_0_sh_mask.h"
39 #include "vega10/MMHUB/mmhub_1_0_offset.h"
40 #include "vega10/MMHUB/mmhub_1_0_sh_mask.h"
41 
42 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
43 
44 #define VCE_V4_0_FW_SIZE	(384 * 1024)
45 #define VCE_V4_0_STACK_SIZE	(64 * 1024)
46 #define VCE_V4_0_DATA_SIZE	((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
47 
48 static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
49 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
50 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
51 
52 static inline void mmsch_insert_direct_wt(struct mmsch_v1_0_cmd_direct_write *direct_wt,
53 					  uint32_t *init_table,
54 					  uint32_t reg_offset,
55 					  uint32_t value)
56 {
57 	direct_wt->cmd_header.reg_offset = reg_offset;
58 	direct_wt->reg_value = value;
59 	memcpy((void *)init_table, direct_wt, sizeof(struct mmsch_v1_0_cmd_direct_write));
60 }
61 
62 static inline void mmsch_insert_direct_rd_mod_wt(struct mmsch_v1_0_cmd_direct_read_modify_write *direct_rd_mod_wt,
63 						 uint32_t *init_table,
64 						 uint32_t reg_offset,
65 						 uint32_t mask, uint32_t data)
66 {
67 	direct_rd_mod_wt->cmd_header.reg_offset = reg_offset;
68 	direct_rd_mod_wt->mask_value = mask;
69 	direct_rd_mod_wt->write_data = data;
70 	memcpy((void *)init_table, direct_rd_mod_wt,
71 	       sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write));
72 }
73 
74 static inline void mmsch_insert_direct_poll(struct mmsch_v1_0_cmd_direct_polling *direct_poll,
75 					    uint32_t *init_table,
76 					    uint32_t reg_offset,
77 					    uint32_t mask, uint32_t wait)
78 {
79 	direct_poll->cmd_header.reg_offset = reg_offset;
80 	direct_poll->mask_value = mask;
81 	direct_poll->wait_value = wait;
82 	memcpy((void *)init_table, direct_poll, sizeof(struct mmsch_v1_0_cmd_direct_polling));
83 }
84 
85 #define INSERT_DIRECT_RD_MOD_WT(reg, mask, data) { \
86 	mmsch_insert_direct_rd_mod_wt(&direct_rd_mod_wt, \
87 				      init_table, (reg), \
88 				      (mask), (data)); \
89 	init_table += sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write)/4; \
90 	table_size += sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write)/4; \
91 }
92 
93 #define INSERT_DIRECT_WT(reg, value) { \
94 	mmsch_insert_direct_wt(&direct_wt, \
95 			       init_table, (reg), \
96 			       (value)); \
97 	init_table += sizeof(struct mmsch_v1_0_cmd_direct_write)/4; \
98 	table_size += sizeof(struct mmsch_v1_0_cmd_direct_write)/4; \
99 }
100 
101 #define INSERT_DIRECT_POLL(reg, mask, wait) { \
102 	mmsch_insert_direct_poll(&direct_poll, \
103 				 init_table, (reg), \
104 				 (mask), (wait)); \
105 	init_table += sizeof(struct mmsch_v1_0_cmd_direct_polling)/4; \
106 	table_size += sizeof(struct mmsch_v1_0_cmd_direct_polling)/4; \
107 }
108 
109 /**
110  * vce_v4_0_ring_get_rptr - get read pointer
111  *
112  * @ring: amdgpu_ring pointer
113  *
114  * Returns the current hardware read pointer
115  */
116 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
117 {
118 	struct amdgpu_device *adev = ring->adev;
119 
120 	if (ring == &adev->vce.ring[0])
121 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
122 	else if (ring == &adev->vce.ring[1])
123 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
124 	else
125 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
126 }
127 
128 /**
129  * vce_v4_0_ring_get_wptr - get write pointer
130  *
131  * @ring: amdgpu_ring pointer
132  *
133  * Returns the current hardware write pointer
134  */
135 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
136 {
137 	struct amdgpu_device *adev = ring->adev;
138 
139 	if (ring->use_doorbell)
140 		return adev->wb.wb[ring->wptr_offs];
141 
142 	if (ring == &adev->vce.ring[0])
143 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
144 	else if (ring == &adev->vce.ring[1])
145 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
146 	else
147 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
148 }
149 
150 /**
151  * vce_v4_0_ring_set_wptr - set write pointer
152  *
153  * @ring: amdgpu_ring pointer
154  *
155  * Commits the write pointer to the hardware
156  */
157 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
158 {
159 	struct amdgpu_device *adev = ring->adev;
160 
161 	if (ring->use_doorbell) {
162 		/* XXX check if swapping is necessary on BE */
163 		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
164 		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
165 		return;
166 	}
167 
168 	if (ring == &adev->vce.ring[0])
169 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
170 			lower_32_bits(ring->wptr));
171 	else if (ring == &adev->vce.ring[1])
172 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
173 			lower_32_bits(ring->wptr));
174 	else
175 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
176 			lower_32_bits(ring->wptr));
177 }
178 
179 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
180 {
181 	int i, j;
182 
183 	for (i = 0; i < 10; ++i) {
184 		for (j = 0; j < 100; ++j) {
185 			uint32_t status =
186 				RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
187 
188 			if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
189 				return 0;
190 			mdelay(10);
191 		}
192 
193 		DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
194 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
195 				VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
196 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
197 		mdelay(10);
198 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
199 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
200 		mdelay(10);
201 
202 	}
203 
204 	return -ETIMEDOUT;
205 }
206 
207 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
208 				struct amdgpu_mm_table *table)
209 {
210 	uint32_t data = 0, loop;
211 	uint64_t addr = table->gpu_addr;
212 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
213 	uint32_t size;
214 
215 	size = header->header_size + header->vce_table_size + header->uvd_table_size;
216 
217 	/* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
218 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
219 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
220 
221 	/* 2, update vmid of descriptor */
222 	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
223 	data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
224 	data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
225 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
226 
227 	/* 3, notify mmsch about the size of this descriptor */
228 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
229 
230 	/* 4, set resp to zero */
231 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
232 
233 	/* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
234 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
235 
236 	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
237 	loop = 1000;
238 	while ((data & 0x10000002) != 0x10000002) {
239 		udelay(10);
240 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
241 		loop--;
242 		if (!loop)
243 			break;
244 	}
245 
246 	if (!loop) {
247 		dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
248 		return -EBUSY;
249 	}
250 
251 	return 0;
252 }
253 
254 static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
255 {
256 	struct amdgpu_ring *ring;
257 	uint32_t offset, size;
258 	uint32_t table_size = 0;
259 	struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
260 	struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
261 	struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
262 	struct mmsch_v1_0_cmd_end end = { { 0 } };
263 	uint32_t *init_table = adev->virt.mm_table.cpu_addr;
264 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
265 
266 	direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
267 	direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
268 	direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
269 	end.cmd_header.command_type = MMSCH_COMMAND__END;
270 
271 	if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
272 		header->version = MMSCH_VERSION;
273 		header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
274 
275 		if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
276 			header->vce_table_offset = header->header_size;
277 		else
278 			header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
279 
280 		init_table += header->vce_table_offset;
281 
282 		ring = &adev->vce.ring[0];
283 		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), ring->wptr);
284 		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), ring->wptr);
285 		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), lower_32_bits(ring->gpu_addr));
286 		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
287 		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
288 
289 		/* BEGING OF MC_RESUME */
290 		INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), ~(1 << 16), 0);
291 		INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), ~0xFF9FF000, 0x1FF000);
292 		INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), ~0x3F, 0x3F);
293 		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
294 
295 		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
296 		INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
297 		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
298 		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
299 		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
300 
301 		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), adev->vce.gpu_addr >> 8);
302 		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), adev->vce.gpu_addr >> 8);
303 		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), adev->vce.gpu_addr >> 8);
304 
305 		offset = AMDGPU_VCE_FIRMWARE_OFFSET;
306 		size = VCE_V4_0_FW_SIZE;
307 		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & 0x7FFFFFFF);
308 		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
309 
310 		offset += size;
311 		size = VCE_V4_0_STACK_SIZE;
312 		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), offset & 0x7FFFFFFF);
313 		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
314 
315 		offset += size;
316 		size = VCE_V4_0_DATA_SIZE;
317 		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), offset & 0x7FFFFFFF);
318 		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
319 
320 		INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
321 		INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
322 				0xffffffff, VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
323 
324 		/* end of MC_RESUME */
325 		INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
326 				~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
327 		INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
328 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
329 
330 		INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
331 				VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
332 				VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
333 
334 		/* clear BUSY flag */
335 		INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
336 				~VCE_STATUS__JOB_BUSY_MASK, 0);
337 
338 		/* add end packet */
339 		memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
340 		table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
341 		header->vce_table_size = table_size;
342 
343 		return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
344 	}
345 
346 	return -EINVAL; /* already initializaed ? */
347 }
348 
349 /**
350  * vce_v4_0_start - start VCE block
351  *
352  * @adev: amdgpu_device pointer
353  *
354  * Setup and start the VCE block
355  */
356 static int vce_v4_0_start(struct amdgpu_device *adev)
357 {
358 	struct amdgpu_ring *ring;
359 	int r;
360 
361 	ring = &adev->vce.ring[0];
362 
363 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
364 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
365 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
366 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
367 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
368 
369 	ring = &adev->vce.ring[1];
370 
371 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
372 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
373 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
374 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
375 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
376 
377 	ring = &adev->vce.ring[2];
378 
379 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
380 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
381 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
382 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
383 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
384 
385 	vce_v4_0_mc_resume(adev);
386 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
387 			~VCE_STATUS__JOB_BUSY_MASK);
388 
389 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
390 
391 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
392 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
393 	mdelay(100);
394 
395 	r = vce_v4_0_firmware_loaded(adev);
396 
397 	/* clear BUSY flag */
398 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
399 
400 	if (r) {
401 		DRM_ERROR("VCE not responding, giving up!!!\n");
402 		return r;
403 	}
404 
405 	return 0;
406 }
407 
408 static int vce_v4_0_stop(struct amdgpu_device *adev)
409 {
410 
411 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
412 
413 	/* hold on ECPU */
414 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
415 			VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
416 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
417 
418 	/* clear BUSY flag */
419 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
420 
421 	/* Set Clock-Gating off */
422 	/* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
423 		vce_v4_0_set_vce_sw_clock_gating(adev, false);
424 	*/
425 
426 	return 0;
427 }
428 
429 static int vce_v4_0_early_init(void *handle)
430 {
431 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
432 
433 	if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
434 		adev->vce.num_rings = 1;
435 	else
436 		adev->vce.num_rings = 3;
437 
438 	vce_v4_0_set_ring_funcs(adev);
439 	vce_v4_0_set_irq_funcs(adev);
440 
441 	return 0;
442 }
443 
444 static int vce_v4_0_sw_init(void *handle)
445 {
446 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
447 	struct amdgpu_ring *ring;
448 	unsigned size;
449 	int r, i;
450 
451 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
452 	if (r)
453 		return r;
454 
455 	size  = (VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE) * 2;
456 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
457 		size += VCE_V4_0_FW_SIZE;
458 
459 	r = amdgpu_vce_sw_init(adev, size);
460 	if (r)
461 		return r;
462 
463 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
464 		const struct common_firmware_header *hdr;
465 		hdr = (const struct common_firmware_header *)adev->vce.fw->data;
466 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
467 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
468 		adev->firmware.fw_size +=
469 			ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
470 		DRM_INFO("PSP loading VCE firmware\n");
471 	}
472 
473 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
474 		r = amdgpu_vce_resume(adev);
475 		if (r)
476 			return r;
477 	}
478 
479 	for (i = 0; i < adev->vce.num_rings; i++) {
480 		ring = &adev->vce.ring[i];
481 		sprintf(ring->name, "vce%d", i);
482 		if (amdgpu_sriov_vf(adev)) {
483 			/* DOORBELL only works under SRIOV */
484 			ring->use_doorbell = true;
485 			if (i == 0)
486 				ring->doorbell_index = AMDGPU_DOORBELL64_RING0_1 * 2;
487 			else if (i == 1)
488 				ring->doorbell_index = AMDGPU_DOORBELL64_RING2_3 * 2;
489 			else
490 				ring->doorbell_index = AMDGPU_DOORBELL64_RING2_3 * 2 + 1;
491 		}
492 		r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
493 		if (r)
494 			return r;
495 	}
496 
497 	if (amdgpu_sriov_vf(adev)) {
498 		r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
499 					    AMDGPU_GEM_DOMAIN_VRAM,
500 					    &adev->virt.mm_table.bo,
501 					    &adev->virt.mm_table.gpu_addr,
502 					    (void *)&adev->virt.mm_table.cpu_addr);
503 		if (!r) {
504 			memset((void *)adev->virt.mm_table.cpu_addr, 0, PAGE_SIZE);
505 			printk("mm table gpu addr = 0x%llx, cpu addr = %p. \n",
506 			       adev->virt.mm_table.gpu_addr,
507 			       adev->virt.mm_table.cpu_addr);
508 		}
509 		return r;
510 	}
511 
512 	return r;
513 }
514 
515 static int vce_v4_0_sw_fini(void *handle)
516 {
517 	int r;
518 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
519 
520 	/* free MM table */
521 	if (amdgpu_sriov_vf(adev))
522 		amdgpu_bo_free_kernel(&adev->virt.mm_table.bo,
523 				      &adev->virt.mm_table.gpu_addr,
524 				      (void *)&adev->virt.mm_table.cpu_addr);
525 
526 	r = amdgpu_vce_suspend(adev);
527 	if (r)
528 		return r;
529 
530 	r = amdgpu_vce_sw_fini(adev);
531 	if (r)
532 		return r;
533 
534 	return r;
535 }
536 
537 static int vce_v4_0_hw_init(void *handle)
538 {
539 	int r, i;
540 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
541 
542 	if (amdgpu_sriov_vf(adev))
543 		r = vce_v4_0_sriov_start(adev);
544 	else
545 		r = vce_v4_0_start(adev);
546 	if (r)
547 		return r;
548 
549 	for (i = 0; i < adev->vce.num_rings; i++)
550 		adev->vce.ring[i].ready = false;
551 
552 	for (i = 0; i < adev->vce.num_rings; i++) {
553 		r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
554 		if (r)
555 			return r;
556 		else
557 			adev->vce.ring[i].ready = true;
558 	}
559 
560 	DRM_INFO("VCE initialized successfully.\n");
561 
562 	return 0;
563 }
564 
565 static int vce_v4_0_hw_fini(void *handle)
566 {
567 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
568 	int i;
569 
570 	/* vce_v4_0_wait_for_idle(handle); */
571 	vce_v4_0_stop(adev);
572 	for (i = 0; i < adev->vce.num_rings; i++)
573 		adev->vce.ring[i].ready = false;
574 
575 	return 0;
576 }
577 
578 static int vce_v4_0_suspend(void *handle)
579 {
580 	int r;
581 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
582 
583 	r = vce_v4_0_hw_fini(adev);
584 	if (r)
585 		return r;
586 
587 	r = amdgpu_vce_suspend(adev);
588 	if (r)
589 		return r;
590 
591 	return r;
592 }
593 
594 static int vce_v4_0_resume(void *handle)
595 {
596 	int r;
597 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
598 
599 	r = amdgpu_vce_resume(adev);
600 	if (r)
601 		return r;
602 
603 	r = vce_v4_0_hw_init(adev);
604 	if (r)
605 		return r;
606 
607 	return r;
608 }
609 
610 static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
611 {
612 	uint32_t offset, size;
613 
614 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
615 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
616 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
617 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
618 
619 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
620 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
621 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
622 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
623 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
624 
625 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
626 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
627 			(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8));
628 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
629 			(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff);
630 	} else {
631 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
632 			(adev->vce.gpu_addr >> 8));
633 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
634 			(adev->vce.gpu_addr >> 40) & 0xff);
635 	}
636 
637 	offset = AMDGPU_VCE_FIRMWARE_OFFSET;
638 	size = VCE_V4_0_FW_SIZE;
639 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
640 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
641 
642 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
643 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
644 	offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
645 	size = VCE_V4_0_STACK_SIZE;
646 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
647 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
648 
649 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
650 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
651 	offset += size;
652 	size = VCE_V4_0_DATA_SIZE;
653 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
654 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
655 
656 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
657 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
658 			VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
659 			~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
660 }
661 
662 static int vce_v4_0_set_clockgating_state(void *handle,
663 					  enum amd_clockgating_state state)
664 {
665 	/* needed for driver unload*/
666 	return 0;
667 }
668 
669 #if 0
670 static bool vce_v4_0_is_idle(void *handle)
671 {
672 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
673 	u32 mask = 0;
674 
675 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
676 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
677 
678 	return !(RREG32(mmSRBM_STATUS2) & mask);
679 }
680 
681 static int vce_v4_0_wait_for_idle(void *handle)
682 {
683 	unsigned i;
684 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
685 
686 	for (i = 0; i < adev->usec_timeout; i++)
687 		if (vce_v4_0_is_idle(handle))
688 			return 0;
689 
690 	return -ETIMEDOUT;
691 }
692 
693 #define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
694 #define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
695 #define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
696 #define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
697 				      VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
698 
699 static bool vce_v4_0_check_soft_reset(void *handle)
700 {
701 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
702 	u32 srbm_soft_reset = 0;
703 
704 	/* According to VCE team , we should use VCE_STATUS instead
705 	 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
706 	 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
707 	 * instance's registers are accessed
708 	 * (0 for 1st instance, 10 for 2nd instance).
709 	 *
710 	 *VCE_STATUS
711 	 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
712 	 *|----+----+-----------+----+----+----+----------+---------+----|
713 	 *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
714 	 *
715 	 * VCE team suggest use bit 3--bit 6 for busy status check
716 	 */
717 	mutex_lock(&adev->grbm_idx_mutex);
718 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
719 	if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
720 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
721 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
722 	}
723 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
724 	if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
725 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
726 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
727 	}
728 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
729 	mutex_unlock(&adev->grbm_idx_mutex);
730 
731 	if (srbm_soft_reset) {
732 		adev->vce.srbm_soft_reset = srbm_soft_reset;
733 		return true;
734 	} else {
735 		adev->vce.srbm_soft_reset = 0;
736 		return false;
737 	}
738 }
739 
740 static int vce_v4_0_soft_reset(void *handle)
741 {
742 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
743 	u32 srbm_soft_reset;
744 
745 	if (!adev->vce.srbm_soft_reset)
746 		return 0;
747 	srbm_soft_reset = adev->vce.srbm_soft_reset;
748 
749 	if (srbm_soft_reset) {
750 		u32 tmp;
751 
752 		tmp = RREG32(mmSRBM_SOFT_RESET);
753 		tmp |= srbm_soft_reset;
754 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
755 		WREG32(mmSRBM_SOFT_RESET, tmp);
756 		tmp = RREG32(mmSRBM_SOFT_RESET);
757 
758 		udelay(50);
759 
760 		tmp &= ~srbm_soft_reset;
761 		WREG32(mmSRBM_SOFT_RESET, tmp);
762 		tmp = RREG32(mmSRBM_SOFT_RESET);
763 
764 		/* Wait a little for things to settle down */
765 		udelay(50);
766 	}
767 
768 	return 0;
769 }
770 
771 static int vce_v4_0_pre_soft_reset(void *handle)
772 {
773 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
774 
775 	if (!adev->vce.srbm_soft_reset)
776 		return 0;
777 
778 	mdelay(5);
779 
780 	return vce_v4_0_suspend(adev);
781 }
782 
783 
784 static int vce_v4_0_post_soft_reset(void *handle)
785 {
786 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
787 
788 	if (!adev->vce.srbm_soft_reset)
789 		return 0;
790 
791 	mdelay(5);
792 
793 	return vce_v4_0_resume(adev);
794 }
795 
796 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
797 {
798 	u32 tmp, data;
799 
800 	tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
801 	if (override)
802 		data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
803 	else
804 		data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
805 
806 	if (tmp != data)
807 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
808 }
809 
810 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
811 					     bool gated)
812 {
813 	u32 data;
814 
815 	/* Set Override to disable Clock Gating */
816 	vce_v4_0_override_vce_clock_gating(adev, true);
817 
818 	/* This function enables MGCG which is controlled by firmware.
819 	   With the clocks in the gated state the core is still
820 	   accessible but the firmware will throttle the clocks on the
821 	   fly as necessary.
822 	*/
823 	if (gated) {
824 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
825 		data |= 0x1ff;
826 		data &= ~0xef0000;
827 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
828 
829 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
830 		data |= 0x3ff000;
831 		data &= ~0xffc00000;
832 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
833 
834 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
835 		data |= 0x2;
836 		data &= ~0x00010000;
837 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
838 
839 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
840 		data |= 0x37f;
841 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
842 
843 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
844 		data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
845 			VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
846 			VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
847 			0x8;
848 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
849 	} else {
850 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
851 		data &= ~0x80010;
852 		data |= 0xe70008;
853 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
854 
855 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
856 		data |= 0xffc00000;
857 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
858 
859 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
860 		data |= 0x10000;
861 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
862 
863 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
864 		data &= ~0xffc00000;
865 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
866 
867 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
868 		data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
869 			  VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
870 			  VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
871 			  0x8);
872 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
873 	}
874 	vce_v4_0_override_vce_clock_gating(adev, false);
875 }
876 
877 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
878 {
879 	u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
880 
881 	if (enable)
882 		tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
883 	else
884 		tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
885 
886 	WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
887 }
888 
889 static int vce_v4_0_set_clockgating_state(void *handle,
890 					  enum amd_clockgating_state state)
891 {
892 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
893 	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
894 	int i;
895 
896 	if ((adev->asic_type == CHIP_POLARIS10) ||
897 		(adev->asic_type == CHIP_TONGA) ||
898 		(adev->asic_type == CHIP_FIJI))
899 		vce_v4_0_set_bypass_mode(adev, enable);
900 
901 	if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
902 		return 0;
903 
904 	mutex_lock(&adev->grbm_idx_mutex);
905 	for (i = 0; i < 2; i++) {
906 		/* Program VCE Instance 0 or 1 if not harvested */
907 		if (adev->vce.harvest_config & (1 << i))
908 			continue;
909 
910 		WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
911 
912 		if (enable) {
913 			/* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
914 			uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
915 			data &= ~(0xf | 0xff0);
916 			data |= ((0x0 << 0) | (0x04 << 4));
917 			WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
918 
919 			/* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
920 			data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
921 			data &= ~(0xf | 0xff0);
922 			data |= ((0x0 << 0) | (0x04 << 4));
923 			WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
924 		}
925 
926 		vce_v4_0_set_vce_sw_clock_gating(adev, enable);
927 	}
928 
929 	WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
930 	mutex_unlock(&adev->grbm_idx_mutex);
931 
932 	return 0;
933 }
934 
935 static int vce_v4_0_set_powergating_state(void *handle,
936 					  enum amd_powergating_state state)
937 {
938 	/* This doesn't actually powergate the VCE block.
939 	 * That's done in the dpm code via the SMC.  This
940 	 * just re-inits the block as necessary.  The actual
941 	 * gating still happens in the dpm code.  We should
942 	 * revisit this when there is a cleaner line between
943 	 * the smc and the hw blocks
944 	 */
945 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
946 
947 	if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE))
948 		return 0;
949 
950 	if (state == AMD_PG_STATE_GATE)
951 		/* XXX do we need a vce_v4_0_stop()? */
952 		return 0;
953 	else
954 		return vce_v4_0_start(adev);
955 }
956 #endif
957 
958 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
959 		struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch)
960 {
961 	amdgpu_ring_write(ring, VCE_CMD_IB_VM);
962 	amdgpu_ring_write(ring, vm_id);
963 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
964 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
965 	amdgpu_ring_write(ring, ib->length_dw);
966 }
967 
968 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
969 			u64 seq, unsigned flags)
970 {
971 	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
972 
973 	amdgpu_ring_write(ring, VCE_CMD_FENCE);
974 	amdgpu_ring_write(ring, addr);
975 	amdgpu_ring_write(ring, upper_32_bits(addr));
976 	amdgpu_ring_write(ring, seq);
977 	amdgpu_ring_write(ring, VCE_CMD_TRAP);
978 }
979 
980 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
981 {
982 	amdgpu_ring_write(ring, VCE_CMD_END);
983 }
984 
985 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
986 			 unsigned int vm_id, uint64_t pd_addr)
987 {
988 	unsigned eng = ring->idx;
989 	unsigned i;
990 
991 	pd_addr = pd_addr | 0x1; /* valid bit */
992 	/* now only use physical base address of PDE and valid */
993 	BUG_ON(pd_addr & 0xFFFF00000000003EULL);
994 
995 	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
996 		struct amdgpu_vmhub *hub = &ring->adev->vmhub[i];
997 		uint32_t req = hub->get_invalidate_req(vm_id);
998 
999 		amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
1000 		amdgpu_ring_write(ring,
1001 			(hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
1002 		amdgpu_ring_write(ring, upper_32_bits(pd_addr));
1003 
1004 		amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
1005 		amdgpu_ring_write(ring,
1006 			(hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
1007 		amdgpu_ring_write(ring, lower_32_bits(pd_addr));
1008 
1009 		amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
1010 		amdgpu_ring_write(ring,
1011 			(hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
1012 		amdgpu_ring_write(ring, 0xffffffff);
1013 		amdgpu_ring_write(ring, lower_32_bits(pd_addr));
1014 
1015 		/* flush TLB */
1016 		amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
1017 		amdgpu_ring_write(ring,	(hub->vm_inv_eng0_req + eng) << 2);
1018 		amdgpu_ring_write(ring, req);
1019 
1020 		/* wait for flush */
1021 		amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
1022 		amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
1023 		amdgpu_ring_write(ring, 1 << vm_id);
1024 		amdgpu_ring_write(ring, 1 << vm_id);
1025 	}
1026 }
1027 
1028 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
1029 					struct amdgpu_irq_src *source,
1030 					unsigned type,
1031 					enum amdgpu_interrupt_state state)
1032 {
1033 	uint32_t val = 0;
1034 
1035 	if (state == AMDGPU_IRQ_STATE_ENABLE)
1036 		val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
1037 
1038 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
1039 			~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
1040 	return 0;
1041 }
1042 
1043 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1044 				      struct amdgpu_irq_src *source,
1045 				      struct amdgpu_iv_entry *entry)
1046 {
1047 	DRM_DEBUG("IH: VCE\n");
1048 
1049 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_STATUS),
1050 			VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK,
1051 			~VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK);
1052 
1053 	switch (entry->src_data[0]) {
1054 	case 0:
1055 	case 1:
1056 	case 2:
1057 		amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
1058 		break;
1059 	default:
1060 		DRM_ERROR("Unhandled interrupt: %d %d\n",
1061 			  entry->src_id, entry->src_data[0]);
1062 		break;
1063 	}
1064 
1065 	return 0;
1066 }
1067 
1068 const struct amd_ip_funcs vce_v4_0_ip_funcs = {
1069 	.name = "vce_v4_0",
1070 	.early_init = vce_v4_0_early_init,
1071 	.late_init = NULL,
1072 	.sw_init = vce_v4_0_sw_init,
1073 	.sw_fini = vce_v4_0_sw_fini,
1074 	.hw_init = vce_v4_0_hw_init,
1075 	.hw_fini = vce_v4_0_hw_fini,
1076 	.suspend = vce_v4_0_suspend,
1077 	.resume = vce_v4_0_resume,
1078 	.is_idle = NULL /* vce_v4_0_is_idle */,
1079 	.wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1080 	.check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1081 	.pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1082 	.soft_reset = NULL /* vce_v4_0_soft_reset */,
1083 	.post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1084 	.set_clockgating_state = vce_v4_0_set_clockgating_state,
1085 	.set_powergating_state = NULL /* vce_v4_0_set_powergating_state */,
1086 };
1087 
1088 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1089 	.type = AMDGPU_RING_TYPE_VCE,
1090 	.align_mask = 0x3f,
1091 	.nop = VCE_CMD_NO_OP,
1092 	.support_64bit_ptrs = false,
1093 	.get_rptr = vce_v4_0_ring_get_rptr,
1094 	.get_wptr = vce_v4_0_ring_get_wptr,
1095 	.set_wptr = vce_v4_0_ring_set_wptr,
1096 	.parse_cs = amdgpu_vce_ring_parse_cs_vm,
1097 	.emit_frame_size =
1098 		17 * AMDGPU_MAX_VMHUBS + /* vce_v4_0_emit_vm_flush */
1099 		5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1100 		1, /* vce_v4_0_ring_insert_end */
1101 	.emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1102 	.emit_ib = vce_v4_0_ring_emit_ib,
1103 	.emit_vm_flush = vce_v4_0_emit_vm_flush,
1104 	.emit_fence = vce_v4_0_ring_emit_fence,
1105 	.test_ring = amdgpu_vce_ring_test_ring,
1106 	.test_ib = amdgpu_vce_ring_test_ib,
1107 	.insert_nop = amdgpu_ring_insert_nop,
1108 	.insert_end = vce_v4_0_ring_insert_end,
1109 	.pad_ib = amdgpu_ring_generic_pad_ib,
1110 	.begin_use = amdgpu_vce_ring_begin_use,
1111 	.end_use = amdgpu_vce_ring_end_use,
1112 };
1113 
1114 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1115 {
1116 	int i;
1117 
1118 	for (i = 0; i < adev->vce.num_rings; i++)
1119 		adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1120 	DRM_INFO("VCE enabled in VM mode\n");
1121 }
1122 
1123 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1124 	.set = vce_v4_0_set_interrupt_state,
1125 	.process = vce_v4_0_process_interrupt,
1126 };
1127 
1128 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1129 {
1130 	adev->vce.irq.num_types = 1;
1131 	adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1132 };
1133 
1134 const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1135 {
1136 	.type = AMD_IP_BLOCK_TYPE_VCE,
1137 	.major = 4,
1138 	.minor = 0,
1139 	.rev = 0,
1140 	.funcs = &vce_v4_0_ip_funcs,
1141 };
1142