xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c (revision 176f011b)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/firmware.h>
25 #include <drm/drmP.h>
26 #include "amdgpu.h"
27 #include "amdgpu_uvd.h"
28 #include "soc15.h"
29 #include "soc15d.h"
30 #include "soc15_common.h"
31 #include "mmsch_v1_0.h"
32 
33 #include "uvd/uvd_7_0_offset.h"
34 #include "uvd/uvd_7_0_sh_mask.h"
35 #include "vce/vce_4_0_offset.h"
36 #include "vce/vce_4_0_default.h"
37 #include "vce/vce_4_0_sh_mask.h"
38 #include "nbif/nbif_6_1_offset.h"
39 #include "hdp/hdp_4_0_offset.h"
40 #include "mmhub/mmhub_1_0_offset.h"
41 #include "mmhub/mmhub_1_0_sh_mask.h"
42 #include "ivsrcid/uvd/irqsrcs_uvd_7_0.h"
43 
44 #define mmUVD_PG0_CC_UVD_HARVESTING                                                                    0x00c7
45 #define mmUVD_PG0_CC_UVD_HARVESTING_BASE_IDX                                                           1
46 //UVD_PG0_CC_UVD_HARVESTING
47 #define UVD_PG0_CC_UVD_HARVESTING__UVD_DISABLE__SHIFT                                                         0x1
48 #define UVD_PG0_CC_UVD_HARVESTING__UVD_DISABLE_MASK                                                           0x00000002L
49 
50 #define UVD7_MAX_HW_INSTANCES_VEGA20			2
51 
52 static void uvd_v7_0_set_ring_funcs(struct amdgpu_device *adev);
53 static void uvd_v7_0_set_enc_ring_funcs(struct amdgpu_device *adev);
54 static void uvd_v7_0_set_irq_funcs(struct amdgpu_device *adev);
55 static int uvd_v7_0_start(struct amdgpu_device *adev);
56 static void uvd_v7_0_stop(struct amdgpu_device *adev);
57 static int uvd_v7_0_sriov_start(struct amdgpu_device *adev);
58 
59 static int amdgpu_ih_clientid_uvds[] = {
60 	SOC15_IH_CLIENTID_UVD,
61 	SOC15_IH_CLIENTID_UVD1
62 };
63 
64 /**
65  * uvd_v7_0_ring_get_rptr - get read pointer
66  *
67  * @ring: amdgpu_ring pointer
68  *
69  * Returns the current hardware read pointer
70  */
71 static uint64_t uvd_v7_0_ring_get_rptr(struct amdgpu_ring *ring)
72 {
73 	struct amdgpu_device *adev = ring->adev;
74 
75 	return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_RPTR);
76 }
77 
78 /**
79  * uvd_v7_0_enc_ring_get_rptr - get enc read pointer
80  *
81  * @ring: amdgpu_ring pointer
82  *
83  * Returns the current hardware enc read pointer
84  */
85 static uint64_t uvd_v7_0_enc_ring_get_rptr(struct amdgpu_ring *ring)
86 {
87 	struct amdgpu_device *adev = ring->adev;
88 
89 	if (ring == &adev->uvd.inst[ring->me].ring_enc[0])
90 		return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR);
91 	else
92 		return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR2);
93 }
94 
95 /**
96  * uvd_v7_0_ring_get_wptr - get write pointer
97  *
98  * @ring: amdgpu_ring pointer
99  *
100  * Returns the current hardware write pointer
101  */
102 static uint64_t uvd_v7_0_ring_get_wptr(struct amdgpu_ring *ring)
103 {
104 	struct amdgpu_device *adev = ring->adev;
105 
106 	return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR);
107 }
108 
109 /**
110  * uvd_v7_0_enc_ring_get_wptr - get enc write pointer
111  *
112  * @ring: amdgpu_ring pointer
113  *
114  * Returns the current hardware enc write pointer
115  */
116 static uint64_t uvd_v7_0_enc_ring_get_wptr(struct amdgpu_ring *ring)
117 {
118 	struct amdgpu_device *adev = ring->adev;
119 
120 	if (ring->use_doorbell)
121 		return adev->wb.wb[ring->wptr_offs];
122 
123 	if (ring == &adev->uvd.inst[ring->me].ring_enc[0])
124 		return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR);
125 	else
126 		return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2);
127 }
128 
129 /**
130  * uvd_v7_0_ring_set_wptr - set write pointer
131  *
132  * @ring: amdgpu_ring pointer
133  *
134  * Commits the write pointer to the hardware
135  */
136 static void uvd_v7_0_ring_set_wptr(struct amdgpu_ring *ring)
137 {
138 	struct amdgpu_device *adev = ring->adev;
139 
140 	WREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
141 }
142 
143 /**
144  * uvd_v7_0_enc_ring_set_wptr - set enc write pointer
145  *
146  * @ring: amdgpu_ring pointer
147  *
148  * Commits the enc write pointer to the hardware
149  */
150 static void uvd_v7_0_enc_ring_set_wptr(struct amdgpu_ring *ring)
151 {
152 	struct amdgpu_device *adev = ring->adev;
153 
154 	if (ring->use_doorbell) {
155 		/* XXX check if swapping is necessary on BE */
156 		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
157 		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
158 		return;
159 	}
160 
161 	if (ring == &adev->uvd.inst[ring->me].ring_enc[0])
162 		WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR,
163 			lower_32_bits(ring->wptr));
164 	else
165 		WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2,
166 			lower_32_bits(ring->wptr));
167 }
168 
169 /**
170  * uvd_v7_0_enc_ring_test_ring - test if UVD ENC ring is working
171  *
172  * @ring: the engine to test on
173  *
174  */
175 static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring)
176 {
177 	struct amdgpu_device *adev = ring->adev;
178 	uint32_t rptr = amdgpu_ring_get_rptr(ring);
179 	unsigned i;
180 	int r;
181 
182 	if (amdgpu_sriov_vf(adev))
183 		return 0;
184 
185 	r = amdgpu_ring_alloc(ring, 16);
186 	if (r)
187 		return r;
188 	amdgpu_ring_write(ring, HEVC_ENC_CMD_END);
189 	amdgpu_ring_commit(ring);
190 
191 	for (i = 0; i < adev->usec_timeout; i++) {
192 		if (amdgpu_ring_get_rptr(ring) != rptr)
193 			break;
194 		DRM_UDELAY(1);
195 	}
196 
197 	if (i >= adev->usec_timeout)
198 		r = -ETIMEDOUT;
199 
200 	return r;
201 }
202 
203 /**
204  * uvd_v7_0_enc_get_create_msg - generate a UVD ENC create msg
205  *
206  * @adev: amdgpu_device pointer
207  * @ring: ring we should submit the msg to
208  * @handle: session handle to use
209  * @fence: optional fence to return
210  *
211  * Open up a stream for HW test
212  */
213 static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
214 				       struct dma_fence **fence)
215 {
216 	const unsigned ib_size_dw = 16;
217 	struct amdgpu_job *job;
218 	struct amdgpu_ib *ib;
219 	struct dma_fence *f = NULL;
220 	uint64_t dummy;
221 	int i, r;
222 
223 	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
224 	if (r)
225 		return r;
226 
227 	ib = &job->ibs[0];
228 	dummy = ib->gpu_addr + 1024;
229 
230 	ib->length_dw = 0;
231 	ib->ptr[ib->length_dw++] = 0x00000018;
232 	ib->ptr[ib->length_dw++] = 0x00000001; /* session info */
233 	ib->ptr[ib->length_dw++] = handle;
234 	ib->ptr[ib->length_dw++] = 0x00000000;
235 	ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
236 	ib->ptr[ib->length_dw++] = dummy;
237 
238 	ib->ptr[ib->length_dw++] = 0x00000014;
239 	ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
240 	ib->ptr[ib->length_dw++] = 0x0000001c;
241 	ib->ptr[ib->length_dw++] = 0x00000000;
242 	ib->ptr[ib->length_dw++] = 0x00000000;
243 
244 	ib->ptr[ib->length_dw++] = 0x00000008;
245 	ib->ptr[ib->length_dw++] = 0x08000001; /* op initialize */
246 
247 	for (i = ib->length_dw; i < ib_size_dw; ++i)
248 		ib->ptr[i] = 0x0;
249 
250 	r = amdgpu_job_submit_direct(job, ring, &f);
251 	if (r)
252 		goto err;
253 
254 	if (fence)
255 		*fence = dma_fence_get(f);
256 	dma_fence_put(f);
257 	return 0;
258 
259 err:
260 	amdgpu_job_free(job);
261 	return r;
262 }
263 
264 /**
265  * uvd_v7_0_enc_get_destroy_msg - generate a UVD ENC destroy msg
266  *
267  * @adev: amdgpu_device pointer
268  * @ring: ring we should submit the msg to
269  * @handle: session handle to use
270  * @fence: optional fence to return
271  *
272  * Close up a stream for HW test or if userspace failed to do so
273  */
274 static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
275 				struct dma_fence **fence)
276 {
277 	const unsigned ib_size_dw = 16;
278 	struct amdgpu_job *job;
279 	struct amdgpu_ib *ib;
280 	struct dma_fence *f = NULL;
281 	uint64_t dummy;
282 	int i, r;
283 
284 	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
285 	if (r)
286 		return r;
287 
288 	ib = &job->ibs[0];
289 	dummy = ib->gpu_addr + 1024;
290 
291 	ib->length_dw = 0;
292 	ib->ptr[ib->length_dw++] = 0x00000018;
293 	ib->ptr[ib->length_dw++] = 0x00000001;
294 	ib->ptr[ib->length_dw++] = handle;
295 	ib->ptr[ib->length_dw++] = 0x00000000;
296 	ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
297 	ib->ptr[ib->length_dw++] = dummy;
298 
299 	ib->ptr[ib->length_dw++] = 0x00000014;
300 	ib->ptr[ib->length_dw++] = 0x00000002;
301 	ib->ptr[ib->length_dw++] = 0x0000001c;
302 	ib->ptr[ib->length_dw++] = 0x00000000;
303 	ib->ptr[ib->length_dw++] = 0x00000000;
304 
305 	ib->ptr[ib->length_dw++] = 0x00000008;
306 	ib->ptr[ib->length_dw++] = 0x08000002; /* op close session */
307 
308 	for (i = ib->length_dw; i < ib_size_dw; ++i)
309 		ib->ptr[i] = 0x0;
310 
311 	r = amdgpu_job_submit_direct(job, ring, &f);
312 	if (r)
313 		goto err;
314 
315 	if (fence)
316 		*fence = dma_fence_get(f);
317 	dma_fence_put(f);
318 	return 0;
319 
320 err:
321 	amdgpu_job_free(job);
322 	return r;
323 }
324 
325 /**
326  * uvd_v7_0_enc_ring_test_ib - test if UVD ENC IBs are working
327  *
328  * @ring: the engine to test on
329  *
330  */
331 static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
332 {
333 	struct dma_fence *fence = NULL;
334 	long r;
335 
336 	r = uvd_v7_0_enc_get_create_msg(ring, 1, NULL);
337 	if (r)
338 		goto error;
339 
340 	r = uvd_v7_0_enc_get_destroy_msg(ring, 1, &fence);
341 	if (r)
342 		goto error;
343 
344 	r = dma_fence_wait_timeout(fence, false, timeout);
345 	if (r == 0)
346 		r = -ETIMEDOUT;
347 	else if (r > 0)
348 		r = 0;
349 
350 error:
351 	dma_fence_put(fence);
352 	return r;
353 }
354 
355 static int uvd_v7_0_early_init(void *handle)
356 {
357 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
358 
359 	if (adev->asic_type == CHIP_VEGA20) {
360 		u32 harvest;
361 		int i;
362 
363 		adev->uvd.num_uvd_inst = UVD7_MAX_HW_INSTANCES_VEGA20;
364 		for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
365 			harvest = RREG32_SOC15(UVD, i, mmUVD_PG0_CC_UVD_HARVESTING);
366 			if (harvest & UVD_PG0_CC_UVD_HARVESTING__UVD_DISABLE_MASK) {
367 				adev->uvd.harvest_config |= 1 << i;
368 			}
369 		}
370 		if (adev->uvd.harvest_config == (AMDGPU_UVD_HARVEST_UVD0 |
371 						 AMDGPU_UVD_HARVEST_UVD1))
372 			/* both instances are harvested, disable the block */
373 			return -ENOENT;
374 	} else {
375 		adev->uvd.num_uvd_inst = 1;
376 	}
377 
378 	if (amdgpu_sriov_vf(adev))
379 		adev->uvd.num_enc_rings = 1;
380 	else
381 		adev->uvd.num_enc_rings = 2;
382 	uvd_v7_0_set_ring_funcs(adev);
383 	uvd_v7_0_set_enc_ring_funcs(adev);
384 	uvd_v7_0_set_irq_funcs(adev);
385 
386 	return 0;
387 }
388 
389 static int uvd_v7_0_sw_init(void *handle)
390 {
391 	struct amdgpu_ring *ring;
392 
393 	int i, j, r;
394 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
395 
396 	for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
397 		if (adev->uvd.harvest_config & (1 << j))
398 			continue;
399 		/* UVD TRAP */
400 		r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_uvds[j], UVD_7_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->uvd.inst[j].irq);
401 		if (r)
402 			return r;
403 
404 		/* UVD ENC TRAP */
405 		for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
406 			r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_uvds[j], i + UVD_7_0__SRCID__UVD_ENC_GEN_PURP, &adev->uvd.inst[j].irq);
407 			if (r)
408 				return r;
409 		}
410 	}
411 
412 	r = amdgpu_uvd_sw_init(adev);
413 	if (r)
414 		return r;
415 
416 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
417 		const struct common_firmware_header *hdr;
418 		hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
419 		adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].ucode_id = AMDGPU_UCODE_ID_UVD;
420 		adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].fw = adev->uvd.fw;
421 		adev->firmware.fw_size +=
422 			ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
423 
424 		if (adev->uvd.num_uvd_inst == UVD7_MAX_HW_INSTANCES_VEGA20) {
425 			adev->firmware.ucode[AMDGPU_UCODE_ID_UVD1].ucode_id = AMDGPU_UCODE_ID_UVD1;
426 			adev->firmware.ucode[AMDGPU_UCODE_ID_UVD1].fw = adev->uvd.fw;
427 			adev->firmware.fw_size +=
428 				ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
429 		}
430 		DRM_INFO("PSP loading UVD firmware\n");
431 	}
432 
433 	for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
434 		if (adev->uvd.harvest_config & (1 << j))
435 			continue;
436 		if (!amdgpu_sriov_vf(adev)) {
437 			ring = &adev->uvd.inst[j].ring;
438 			sprintf(ring->name, "uvd_%d", ring->me);
439 			r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst[j].irq, 0);
440 			if (r)
441 				return r;
442 		}
443 
444 		for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
445 			ring = &adev->uvd.inst[j].ring_enc[i];
446 			sprintf(ring->name, "uvd_enc_%d.%d", ring->me, i);
447 			if (amdgpu_sriov_vf(adev)) {
448 				ring->use_doorbell = true;
449 
450 				/* currently only use the first enconding ring for
451 				 * sriov, so set unused location for other unused rings.
452 				 */
453 				if (i == 0)
454 					ring->doorbell_index = adev->doorbell_index.uvd_vce.uvd_ring0_1 * 2;
455 				else
456 					ring->doorbell_index = adev->doorbell_index.uvd_vce.uvd_ring2_3 * 2 + 1;
457 			}
458 			r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst[j].irq, 0);
459 			if (r)
460 				return r;
461 		}
462 	}
463 
464 	r = amdgpu_uvd_resume(adev);
465 	if (r)
466 		return r;
467 
468 	r = amdgpu_uvd_entity_init(adev);
469 	if (r)
470 		return r;
471 
472 	r = amdgpu_virt_alloc_mm_table(adev);
473 	if (r)
474 		return r;
475 
476 	return r;
477 }
478 
479 static int uvd_v7_0_sw_fini(void *handle)
480 {
481 	int i, j, r;
482 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
483 
484 	amdgpu_virt_free_mm_table(adev);
485 
486 	r = amdgpu_uvd_suspend(adev);
487 	if (r)
488 		return r;
489 
490 	for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
491 		if (adev->uvd.harvest_config & (1 << j))
492 			continue;
493 		for (i = 0; i < adev->uvd.num_enc_rings; ++i)
494 			amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]);
495 	}
496 	return amdgpu_uvd_sw_fini(adev);
497 }
498 
499 /**
500  * uvd_v7_0_hw_init - start and test UVD block
501  *
502  * @adev: amdgpu_device pointer
503  *
504  * Initialize the hardware, boot up the VCPU and do some testing
505  */
506 static int uvd_v7_0_hw_init(void *handle)
507 {
508 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
509 	struct amdgpu_ring *ring;
510 	uint32_t tmp;
511 	int i, j, r;
512 
513 	if (amdgpu_sriov_vf(adev))
514 		r = uvd_v7_0_sriov_start(adev);
515 	else
516 		r = uvd_v7_0_start(adev);
517 	if (r)
518 		goto done;
519 
520 	for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
521 		if (adev->uvd.harvest_config & (1 << j))
522 			continue;
523 		ring = &adev->uvd.inst[j].ring;
524 
525 		if (!amdgpu_sriov_vf(adev)) {
526 			r = amdgpu_ring_test_helper(ring);
527 			if (r)
528 				goto done;
529 
530 			r = amdgpu_ring_alloc(ring, 10);
531 			if (r) {
532 				DRM_ERROR("amdgpu: (%d)ring failed to lock UVD ring (%d).\n", j, r);
533 				goto done;
534 			}
535 
536 			tmp = PACKET0(SOC15_REG_OFFSET(UVD, j,
537 				mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL), 0);
538 			amdgpu_ring_write(ring, tmp);
539 			amdgpu_ring_write(ring, 0xFFFFF);
540 
541 			tmp = PACKET0(SOC15_REG_OFFSET(UVD, j,
542 				mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL), 0);
543 			amdgpu_ring_write(ring, tmp);
544 			amdgpu_ring_write(ring, 0xFFFFF);
545 
546 			tmp = PACKET0(SOC15_REG_OFFSET(UVD, j,
547 				mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL), 0);
548 			amdgpu_ring_write(ring, tmp);
549 			amdgpu_ring_write(ring, 0xFFFFF);
550 
551 			/* Clear timeout status bits */
552 			amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, j,
553 				mmUVD_SEMA_TIMEOUT_STATUS), 0));
554 			amdgpu_ring_write(ring, 0x8);
555 
556 			amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, j,
557 				mmUVD_SEMA_CNTL), 0));
558 			amdgpu_ring_write(ring, 3);
559 
560 			amdgpu_ring_commit(ring);
561 		}
562 
563 		for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
564 			ring = &adev->uvd.inst[j].ring_enc[i];
565 			r = amdgpu_ring_test_helper(ring);
566 			if (r)
567 				goto done;
568 		}
569 	}
570 done:
571 	if (!r)
572 		DRM_INFO("UVD and UVD ENC initialized successfully.\n");
573 
574 	return r;
575 }
576 
577 /**
578  * uvd_v7_0_hw_fini - stop the hardware block
579  *
580  * @adev: amdgpu_device pointer
581  *
582  * Stop the UVD block, mark ring as not ready any more
583  */
584 static int uvd_v7_0_hw_fini(void *handle)
585 {
586 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
587 	int i;
588 
589 	if (!amdgpu_sriov_vf(adev))
590 		uvd_v7_0_stop(adev);
591 	else {
592 		/* full access mode, so don't touch any UVD register */
593 		DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
594 	}
595 
596 	for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
597 		if (adev->uvd.harvest_config & (1 << i))
598 			continue;
599 		adev->uvd.inst[i].ring.sched.ready = false;
600 	}
601 
602 	return 0;
603 }
604 
605 static int uvd_v7_0_suspend(void *handle)
606 {
607 	int r;
608 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
609 
610 	r = uvd_v7_0_hw_fini(adev);
611 	if (r)
612 		return r;
613 
614 	return amdgpu_uvd_suspend(adev);
615 }
616 
617 static int uvd_v7_0_resume(void *handle)
618 {
619 	int r;
620 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
621 
622 	r = amdgpu_uvd_resume(adev);
623 	if (r)
624 		return r;
625 
626 	return uvd_v7_0_hw_init(adev);
627 }
628 
629 /**
630  * uvd_v7_0_mc_resume - memory controller programming
631  *
632  * @adev: amdgpu_device pointer
633  *
634  * Let the UVD memory controller know it's offsets
635  */
636 static void uvd_v7_0_mc_resume(struct amdgpu_device *adev)
637 {
638 	uint32_t size = AMDGPU_UVD_FIRMWARE_SIZE(adev);
639 	uint32_t offset;
640 	int i;
641 
642 	for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
643 		if (adev->uvd.harvest_config & (1 << i))
644 			continue;
645 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
646 			WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
647 				i == 0 ?
648 				adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].tmr_mc_addr_lo:
649 				adev->firmware.ucode[AMDGPU_UCODE_ID_UVD1].tmr_mc_addr_lo);
650 			WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
651 				i == 0 ?
652 				adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].tmr_mc_addr_hi:
653 				adev->firmware.ucode[AMDGPU_UCODE_ID_UVD1].tmr_mc_addr_hi);
654 			WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, 0);
655 			offset = 0;
656 		} else {
657 			WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
658 				lower_32_bits(adev->uvd.inst[i].gpu_addr));
659 			WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
660 				upper_32_bits(adev->uvd.inst[i].gpu_addr));
661 			offset = size;
662 			WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0,
663 					AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
664 		}
665 
666 		WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE0, size);
667 
668 		WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
669 				lower_32_bits(adev->uvd.inst[i].gpu_addr + offset));
670 		WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
671 				upper_32_bits(adev->uvd.inst[i].gpu_addr + offset));
672 		WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET1, (1 << 21));
673 		WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_UVD_HEAP_SIZE);
674 
675 		WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
676 				lower_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
677 		WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
678 				upper_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
679 		WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET2, (2 << 21));
680 		WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE2,
681 				AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40));
682 
683 		WREG32_SOC15(UVD, i, mmUVD_UDEC_ADDR_CONFIG,
684 				adev->gfx.config.gb_addr_config);
685 		WREG32_SOC15(UVD, i, mmUVD_UDEC_DB_ADDR_CONFIG,
686 				adev->gfx.config.gb_addr_config);
687 		WREG32_SOC15(UVD, i, mmUVD_UDEC_DBW_ADDR_CONFIG,
688 				adev->gfx.config.gb_addr_config);
689 
690 		WREG32_SOC15(UVD, i, mmUVD_GP_SCRATCH4, adev->uvd.max_handles);
691 	}
692 }
693 
694 static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev,
695 				struct amdgpu_mm_table *table)
696 {
697 	uint32_t data = 0, loop;
698 	uint64_t addr = table->gpu_addr;
699 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
700 	uint32_t size;
701 	int i;
702 
703 	size = header->header_size + header->vce_table_size + header->uvd_table_size;
704 
705 	/* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
706 	WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO, lower_32_bits(addr));
707 	WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI, upper_32_bits(addr));
708 
709 	/* 2, update vmid of descriptor */
710 	data = RREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_VMID);
711 	data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
712 	data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
713 	WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_VMID, data);
714 
715 	/* 3, notify mmsch about the size of this descriptor */
716 	WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE, size);
717 
718 	/* 4, set resp to zero */
719 	WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP, 0);
720 
721 	for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
722 		if (adev->uvd.harvest_config & (1 << i))
723 			continue;
724 		WDOORBELL32(adev->uvd.inst[i].ring_enc[0].doorbell_index, 0);
725 		adev->wb.wb[adev->uvd.inst[i].ring_enc[0].wptr_offs] = 0;
726 		adev->uvd.inst[i].ring_enc[0].wptr = 0;
727 		adev->uvd.inst[i].ring_enc[0].wptr_old = 0;
728 	}
729 	/* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
730 	WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST, 0x10000001);
731 
732 	data = RREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP);
733 	loop = 1000;
734 	while ((data & 0x10000002) != 0x10000002) {
735 		udelay(10);
736 		data = RREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP);
737 		loop--;
738 		if (!loop)
739 			break;
740 	}
741 
742 	if (!loop) {
743 		dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
744 		return -EBUSY;
745 	}
746 
747 	return 0;
748 }
749 
750 static int uvd_v7_0_sriov_start(struct amdgpu_device *adev)
751 {
752 	struct amdgpu_ring *ring;
753 	uint32_t offset, size, tmp;
754 	uint32_t table_size = 0;
755 	struct mmsch_v1_0_cmd_direct_write direct_wt = { {0} };
756 	struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { {0} };
757 	struct mmsch_v1_0_cmd_direct_polling direct_poll = { {0} };
758 	struct mmsch_v1_0_cmd_end end = { {0} };
759 	uint32_t *init_table = adev->virt.mm_table.cpu_addr;
760 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
761 	uint8_t i = 0;
762 
763 	direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
764 	direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
765 	direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
766 	end.cmd_header.command_type = MMSCH_COMMAND__END;
767 
768 	if (header->uvd_table_offset == 0 && header->uvd_table_size == 0) {
769 		header->version = MMSCH_VERSION;
770 		header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
771 
772 		if (header->vce_table_offset == 0 && header->vce_table_size == 0)
773 			header->uvd_table_offset = header->header_size;
774 		else
775 			header->uvd_table_offset = header->vce_table_size + header->vce_table_offset;
776 
777 		init_table += header->uvd_table_offset;
778 
779 		for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
780 			if (adev->uvd.harvest_config & (1 << i))
781 				continue;
782 			ring = &adev->uvd.inst[i].ring;
783 			ring->wptr = 0;
784 			size = AMDGPU_GPU_PAGE_ALIGN(adev->uvd.fw->size + 4);
785 
786 			MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS),
787 							   0xFFFFFFFF, 0x00000004);
788 			/* mc resume*/
789 			if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
790 				MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
791 							    lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
792 				MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
793 							    upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
794 				offset = 0;
795 			} else {
796 				MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
797 							    lower_32_bits(adev->uvd.inst[i].gpu_addr));
798 				MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
799 							    upper_32_bits(adev->uvd.inst[i].gpu_addr));
800 				offset = size;
801 			}
802 
803 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET0),
804 						    AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
805 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE0), size);
806 
807 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
808 						    lower_32_bits(adev->uvd.inst[i].gpu_addr + offset));
809 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
810 						    upper_32_bits(adev->uvd.inst[i].gpu_addr + offset));
811 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET1), (1 << 21));
812 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_UVD_HEAP_SIZE);
813 
814 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
815 						    lower_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
816 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
817 						    upper_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
818 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET2), (2 << 21));
819 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE2),
820 						    AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40));
821 
822 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_GP_SCRATCH4), adev->uvd.max_handles);
823 			/* mc resume end*/
824 
825 			/* disable clock gating */
826 			MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_CGC_CTRL),
827 							   ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK, 0);
828 
829 			/* disable interupt */
830 			MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN),
831 							   ~UVD_MASTINT_EN__VCPU_EN_MASK, 0);
832 
833 			/* stall UMC and register bus before resetting VCPU */
834 			MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2),
835 							   ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
836 							   UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
837 
838 			/* put LMI, VCPU, RBC etc... into reset */
839 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_SOFT_RESET),
840 						    (uint32_t)(UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
841 							       UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK |
842 							       UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK |
843 							       UVD_SOFT_RESET__RBC_SOFT_RESET_MASK |
844 							       UVD_SOFT_RESET__CSM_SOFT_RESET_MASK |
845 							       UVD_SOFT_RESET__CXW_SOFT_RESET_MASK |
846 							       UVD_SOFT_RESET__TAP_SOFT_RESET_MASK |
847 							       UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK));
848 
849 			/* initialize UVD memory controller */
850 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL),
851 						    (uint32_t)((0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
852 							       UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
853 							       UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
854 							       UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
855 							       UVD_LMI_CTRL__REQ_MODE_MASK |
856 							       0x00100000L));
857 
858 			/* take all subblocks out of reset, except VCPU */
859 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_SOFT_RESET),
860 						    UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
861 
862 			/* enable VCPU clock */
863 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL),
864 						    UVD_VCPU_CNTL__CLK_EN_MASK);
865 
866 			/* enable master interrupt */
867 			MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN),
868 							   ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK),
869 							   (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK));
870 
871 			/* clear the bit 4 of UVD_STATUS */
872 			MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS),
873 							   ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT), 0);
874 
875 			/* force RBC into idle state */
876 			size = order_base_2(ring->ring_size);
877 			tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, size);
878 			tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
879 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RBC_RB_CNTL), tmp);
880 
881 			ring = &adev->uvd.inst[i].ring_enc[0];
882 			ring->wptr = 0;
883 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_LO), ring->gpu_addr);
884 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
885 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_SIZE), ring->ring_size / 4);
886 
887 			/* boot up the VCPU */
888 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_SOFT_RESET), 0);
889 
890 			/* enable UMC */
891 			MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2),
892 											   ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, 0);
893 
894 			MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS), 0x02, 0x02);
895 		}
896 		/* add end packet */
897 		memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
898 		table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
899 		header->uvd_table_size = table_size;
900 
901 	}
902 	return uvd_v7_0_mmsch_start(adev, &adev->virt.mm_table);
903 }
904 
905 /**
906  * uvd_v7_0_start - start UVD block
907  *
908  * @adev: amdgpu_device pointer
909  *
910  * Setup and start the UVD block
911  */
912 static int uvd_v7_0_start(struct amdgpu_device *adev)
913 {
914 	struct amdgpu_ring *ring;
915 	uint32_t rb_bufsz, tmp;
916 	uint32_t lmi_swap_cntl;
917 	uint32_t mp_swap_cntl;
918 	int i, j, k, r;
919 
920 	for (k = 0; k < adev->uvd.num_uvd_inst; ++k) {
921 		if (adev->uvd.harvest_config & (1 << k))
922 			continue;
923 		/* disable DPG */
924 		WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_POWER_STATUS), 0,
925 				~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
926 	}
927 
928 	/* disable byte swapping */
929 	lmi_swap_cntl = 0;
930 	mp_swap_cntl = 0;
931 
932 	uvd_v7_0_mc_resume(adev);
933 
934 	for (k = 0; k < adev->uvd.num_uvd_inst; ++k) {
935 		if (adev->uvd.harvest_config & (1 << k))
936 			continue;
937 		ring = &adev->uvd.inst[k].ring;
938 		/* disable clock gating */
939 		WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_CGC_CTRL), 0,
940 				~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK);
941 
942 		/* disable interupt */
943 		WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_MASTINT_EN), 0,
944 				~UVD_MASTINT_EN__VCPU_EN_MASK);
945 
946 		/* stall UMC and register bus before resetting VCPU */
947 		WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_LMI_CTRL2),
948 				UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
949 				~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
950 		mdelay(1);
951 
952 		/* put LMI, VCPU, RBC etc... into reset */
953 		WREG32_SOC15(UVD, k, mmUVD_SOFT_RESET,
954 			UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
955 			UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK |
956 			UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK |
957 			UVD_SOFT_RESET__RBC_SOFT_RESET_MASK |
958 			UVD_SOFT_RESET__CSM_SOFT_RESET_MASK |
959 			UVD_SOFT_RESET__CXW_SOFT_RESET_MASK |
960 			UVD_SOFT_RESET__TAP_SOFT_RESET_MASK |
961 			UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK);
962 		mdelay(5);
963 
964 		/* initialize UVD memory controller */
965 		WREG32_SOC15(UVD, k, mmUVD_LMI_CTRL,
966 			(0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
967 			UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
968 			UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
969 			UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
970 			UVD_LMI_CTRL__REQ_MODE_MASK |
971 			0x00100000L);
972 
973 #ifdef __BIG_ENDIAN
974 		/* swap (8 in 32) RB and IB */
975 		lmi_swap_cntl = 0xa;
976 		mp_swap_cntl = 0;
977 #endif
978 		WREG32_SOC15(UVD, k, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl);
979 		WREG32_SOC15(UVD, k, mmUVD_MP_SWAP_CNTL, mp_swap_cntl);
980 
981 		WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXA0, 0x40c2040);
982 		WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXA1, 0x0);
983 		WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXB0, 0x40c2040);
984 		WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXB1, 0x0);
985 		WREG32_SOC15(UVD, k, mmUVD_MPC_SET_ALU, 0);
986 		WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUX, 0x88);
987 
988 		/* take all subblocks out of reset, except VCPU */
989 		WREG32_SOC15(UVD, k, mmUVD_SOFT_RESET,
990 				UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
991 		mdelay(5);
992 
993 		/* enable VCPU clock */
994 		WREG32_SOC15(UVD, k, mmUVD_VCPU_CNTL,
995 				UVD_VCPU_CNTL__CLK_EN_MASK);
996 
997 		/* enable UMC */
998 		WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_LMI_CTRL2), 0,
999 				~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
1000 
1001 		/* boot up the VCPU */
1002 		WREG32_SOC15(UVD, k, mmUVD_SOFT_RESET, 0);
1003 		mdelay(10);
1004 
1005 		for (i = 0; i < 10; ++i) {
1006 			uint32_t status;
1007 
1008 			for (j = 0; j < 100; ++j) {
1009 				status = RREG32_SOC15(UVD, k, mmUVD_STATUS);
1010 				if (status & 2)
1011 					break;
1012 				mdelay(10);
1013 			}
1014 			r = 0;
1015 			if (status & 2)
1016 				break;
1017 
1018 			DRM_ERROR("UVD(%d) not responding, trying to reset the VCPU!!!\n", k);
1019 			WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_SOFT_RESET),
1020 					UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK,
1021 					~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
1022 			mdelay(10);
1023 			WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_SOFT_RESET), 0,
1024 					~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
1025 			mdelay(10);
1026 			r = -1;
1027 		}
1028 
1029 		if (r) {
1030 			DRM_ERROR("UVD(%d) not responding, giving up!!!\n", k);
1031 			return r;
1032 		}
1033 		/* enable master interrupt */
1034 		WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_MASTINT_EN),
1035 			(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK),
1036 			~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK));
1037 
1038 		/* clear the bit 4 of UVD_STATUS */
1039 		WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_STATUS), 0,
1040 				~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
1041 
1042 		/* force RBC into idle state */
1043 		rb_bufsz = order_base_2(ring->ring_size);
1044 		tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
1045 		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
1046 		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
1047 		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_WPTR_POLL_EN, 0);
1048 		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
1049 		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
1050 		WREG32_SOC15(UVD, k, mmUVD_RBC_RB_CNTL, tmp);
1051 
1052 		/* set the write pointer delay */
1053 		WREG32_SOC15(UVD, k, mmUVD_RBC_RB_WPTR_CNTL, 0);
1054 
1055 		/* set the wb address */
1056 		WREG32_SOC15(UVD, k, mmUVD_RBC_RB_RPTR_ADDR,
1057 				(upper_32_bits(ring->gpu_addr) >> 2));
1058 
1059 		/* programm the RB_BASE for ring buffer */
1060 		WREG32_SOC15(UVD, k, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
1061 				lower_32_bits(ring->gpu_addr));
1062 		WREG32_SOC15(UVD, k, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
1063 				upper_32_bits(ring->gpu_addr));
1064 
1065 		/* Initialize the ring buffer's read and write pointers */
1066 		WREG32_SOC15(UVD, k, mmUVD_RBC_RB_RPTR, 0);
1067 
1068 		ring->wptr = RREG32_SOC15(UVD, k, mmUVD_RBC_RB_RPTR);
1069 		WREG32_SOC15(UVD, k, mmUVD_RBC_RB_WPTR,
1070 				lower_32_bits(ring->wptr));
1071 
1072 		WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_RBC_RB_CNTL), 0,
1073 				~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK);
1074 
1075 		ring = &adev->uvd.inst[k].ring_enc[0];
1076 		WREG32_SOC15(UVD, k, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
1077 		WREG32_SOC15(UVD, k, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
1078 		WREG32_SOC15(UVD, k, mmUVD_RB_BASE_LO, ring->gpu_addr);
1079 		WREG32_SOC15(UVD, k, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
1080 		WREG32_SOC15(UVD, k, mmUVD_RB_SIZE, ring->ring_size / 4);
1081 
1082 		ring = &adev->uvd.inst[k].ring_enc[1];
1083 		WREG32_SOC15(UVD, k, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
1084 		WREG32_SOC15(UVD, k, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
1085 		WREG32_SOC15(UVD, k, mmUVD_RB_BASE_LO2, ring->gpu_addr);
1086 		WREG32_SOC15(UVD, k, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
1087 		WREG32_SOC15(UVD, k, mmUVD_RB_SIZE2, ring->ring_size / 4);
1088 	}
1089 	return 0;
1090 }
1091 
1092 /**
1093  * uvd_v7_0_stop - stop UVD block
1094  *
1095  * @adev: amdgpu_device pointer
1096  *
1097  * stop the UVD block
1098  */
1099 static void uvd_v7_0_stop(struct amdgpu_device *adev)
1100 {
1101 	uint8_t i = 0;
1102 
1103 	for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
1104 		if (adev->uvd.harvest_config & (1 << i))
1105 			continue;
1106 		/* force RBC into idle state */
1107 		WREG32_SOC15(UVD, i, mmUVD_RBC_RB_CNTL, 0x11010101);
1108 
1109 		/* Stall UMC and register bus before resetting VCPU */
1110 		WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2),
1111 				UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
1112 				~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
1113 		mdelay(1);
1114 
1115 		/* put VCPU into reset */
1116 		WREG32_SOC15(UVD, i, mmUVD_SOFT_RESET,
1117 				UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
1118 		mdelay(5);
1119 
1120 		/* disable VCPU clock */
1121 		WREG32_SOC15(UVD, i, mmUVD_VCPU_CNTL, 0x0);
1122 
1123 		/* Unstall UMC and register bus */
1124 		WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2), 0,
1125 				~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
1126 	}
1127 }
1128 
1129 /**
1130  * uvd_v7_0_ring_emit_fence - emit an fence & trap command
1131  *
1132  * @ring: amdgpu_ring pointer
1133  * @fence: fence to emit
1134  *
1135  * Write a fence and a trap command to the ring.
1136  */
1137 static void uvd_v7_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
1138 				     unsigned flags)
1139 {
1140 	struct amdgpu_device *adev = ring->adev;
1141 
1142 	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
1143 
1144 	amdgpu_ring_write(ring,
1145 		PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_CONTEXT_ID), 0));
1146 	amdgpu_ring_write(ring, seq);
1147 	amdgpu_ring_write(ring,
1148 		PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0));
1149 	amdgpu_ring_write(ring, addr & 0xffffffff);
1150 	amdgpu_ring_write(ring,
1151 		PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0));
1152 	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff);
1153 	amdgpu_ring_write(ring,
1154 		PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0));
1155 	amdgpu_ring_write(ring, 0);
1156 
1157 	amdgpu_ring_write(ring,
1158 		PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0));
1159 	amdgpu_ring_write(ring, 0);
1160 	amdgpu_ring_write(ring,
1161 		PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0));
1162 	amdgpu_ring_write(ring, 0);
1163 	amdgpu_ring_write(ring,
1164 		PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0));
1165 	amdgpu_ring_write(ring, 2);
1166 }
1167 
1168 /**
1169  * uvd_v7_0_enc_ring_emit_fence - emit an enc fence & trap command
1170  *
1171  * @ring: amdgpu_ring pointer
1172  * @fence: fence to emit
1173  *
1174  * Write enc a fence and a trap command to the ring.
1175  */
1176 static void uvd_v7_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
1177 			u64 seq, unsigned flags)
1178 {
1179 
1180 	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
1181 
1182 	amdgpu_ring_write(ring, HEVC_ENC_CMD_FENCE);
1183 	amdgpu_ring_write(ring, addr);
1184 	amdgpu_ring_write(ring, upper_32_bits(addr));
1185 	amdgpu_ring_write(ring, seq);
1186 	amdgpu_ring_write(ring, HEVC_ENC_CMD_TRAP);
1187 }
1188 
1189 /**
1190  * uvd_v7_0_ring_emit_hdp_flush - skip HDP flushing
1191  *
1192  * @ring: amdgpu_ring pointer
1193  */
1194 static void uvd_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
1195 {
1196 	/* The firmware doesn't seem to like touching registers at this point. */
1197 }
1198 
1199 /**
1200  * uvd_v7_0_ring_test_ring - register write test
1201  *
1202  * @ring: amdgpu_ring pointer
1203  *
1204  * Test if we can successfully write to the context register
1205  */
1206 static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring)
1207 {
1208 	struct amdgpu_device *adev = ring->adev;
1209 	uint32_t tmp = 0;
1210 	unsigned i;
1211 	int r;
1212 
1213 	WREG32_SOC15(UVD, ring->me, mmUVD_CONTEXT_ID, 0xCAFEDEAD);
1214 	r = amdgpu_ring_alloc(ring, 3);
1215 	if (r)
1216 		return r;
1217 
1218 	amdgpu_ring_write(ring,
1219 		PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_CONTEXT_ID), 0));
1220 	amdgpu_ring_write(ring, 0xDEADBEEF);
1221 	amdgpu_ring_commit(ring);
1222 	for (i = 0; i < adev->usec_timeout; i++) {
1223 		tmp = RREG32_SOC15(UVD, ring->me, mmUVD_CONTEXT_ID);
1224 		if (tmp == 0xDEADBEEF)
1225 			break;
1226 		DRM_UDELAY(1);
1227 	}
1228 
1229 	if (i >= adev->usec_timeout)
1230 		r = -ETIMEDOUT;
1231 
1232 	return r;
1233 }
1234 
1235 /**
1236  * uvd_v7_0_ring_patch_cs_in_place - Patch the IB for command submission.
1237  *
1238  * @p: the CS parser with the IBs
1239  * @ib_idx: which IB to patch
1240  *
1241  */
1242 static int uvd_v7_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
1243 					   uint32_t ib_idx)
1244 {
1245 	struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
1246 	struct amdgpu_ib *ib = &p->job->ibs[ib_idx];
1247 	unsigned i;
1248 
1249 	/* No patching necessary for the first instance */
1250 	if (!ring->me)
1251 		return 0;
1252 
1253 	for (i = 0; i < ib->length_dw; i += 2) {
1254 		uint32_t reg = amdgpu_get_ib_value(p, ib_idx, i);
1255 
1256 		reg -= p->adev->reg_offset[UVD_HWIP][0][1];
1257 		reg += p->adev->reg_offset[UVD_HWIP][1][1];
1258 
1259 		amdgpu_set_ib_value(p, ib_idx, i, reg);
1260 	}
1261 	return 0;
1262 }
1263 
1264 /**
1265  * uvd_v7_0_ring_emit_ib - execute indirect buffer
1266  *
1267  * @ring: amdgpu_ring pointer
1268  * @ib: indirect buffer to execute
1269  *
1270  * Write ring commands to execute the indirect buffer
1271  */
1272 static void uvd_v7_0_ring_emit_ib(struct amdgpu_ring *ring,
1273 				  struct amdgpu_job *job,
1274 				  struct amdgpu_ib *ib,
1275 				  bool ctx_switch)
1276 {
1277 	struct amdgpu_device *adev = ring->adev;
1278 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
1279 
1280 	amdgpu_ring_write(ring,
1281 		PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_LMI_RBC_IB_VMID), 0));
1282 	amdgpu_ring_write(ring, vmid);
1283 
1284 	amdgpu_ring_write(ring,
1285 		PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_LMI_RBC_IB_64BIT_BAR_LOW), 0));
1286 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
1287 	amdgpu_ring_write(ring,
1288 		PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH), 0));
1289 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
1290 	amdgpu_ring_write(ring,
1291 		PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_RBC_IB_SIZE), 0));
1292 	amdgpu_ring_write(ring, ib->length_dw);
1293 }
1294 
1295 /**
1296  * uvd_v7_0_enc_ring_emit_ib - enc execute indirect buffer
1297  *
1298  * @ring: amdgpu_ring pointer
1299  * @ib: indirect buffer to execute
1300  *
1301  * Write enc ring commands to execute the indirect buffer
1302  */
1303 static void uvd_v7_0_enc_ring_emit_ib(struct amdgpu_ring *ring,
1304 					struct amdgpu_job *job,
1305 					struct amdgpu_ib *ib,
1306 					bool ctx_switch)
1307 {
1308 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
1309 
1310 	amdgpu_ring_write(ring, HEVC_ENC_CMD_IB_VM);
1311 	amdgpu_ring_write(ring, vmid);
1312 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
1313 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
1314 	amdgpu_ring_write(ring, ib->length_dw);
1315 }
1316 
1317 static void uvd_v7_0_ring_emit_wreg(struct amdgpu_ring *ring,
1318 				    uint32_t reg, uint32_t val)
1319 {
1320 	struct amdgpu_device *adev = ring->adev;
1321 
1322 	amdgpu_ring_write(ring,
1323 		PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0));
1324 	amdgpu_ring_write(ring, reg << 2);
1325 	amdgpu_ring_write(ring,
1326 		PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0));
1327 	amdgpu_ring_write(ring, val);
1328 	amdgpu_ring_write(ring,
1329 		PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0));
1330 	amdgpu_ring_write(ring, 8);
1331 }
1332 
1333 static void uvd_v7_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
1334 					uint32_t val, uint32_t mask)
1335 {
1336 	struct amdgpu_device *adev = ring->adev;
1337 
1338 	amdgpu_ring_write(ring,
1339 		PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0));
1340 	amdgpu_ring_write(ring, reg << 2);
1341 	amdgpu_ring_write(ring,
1342 		PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0));
1343 	amdgpu_ring_write(ring, val);
1344 	amdgpu_ring_write(ring,
1345 		PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GP_SCRATCH8), 0));
1346 	amdgpu_ring_write(ring, mask);
1347 	amdgpu_ring_write(ring,
1348 		PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0));
1349 	amdgpu_ring_write(ring, 12);
1350 }
1351 
1352 static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
1353 					unsigned vmid, uint64_t pd_addr)
1354 {
1355 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
1356 	uint32_t data0, data1, mask;
1357 
1358 	pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
1359 
1360 	/* wait for reg writes */
1361 	data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2;
1362 	data1 = lower_32_bits(pd_addr);
1363 	mask = 0xffffffff;
1364 	uvd_v7_0_ring_emit_reg_wait(ring, data0, data1, mask);
1365 }
1366 
1367 static void uvd_v7_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
1368 {
1369 	struct amdgpu_device *adev = ring->adev;
1370 	int i;
1371 
1372 	WARN_ON(ring->wptr % 2 || count % 2);
1373 
1374 	for (i = 0; i < count / 2; i++) {
1375 		amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_NO_OP), 0));
1376 		amdgpu_ring_write(ring, 0);
1377 	}
1378 }
1379 
1380 static void uvd_v7_0_enc_ring_insert_end(struct amdgpu_ring *ring)
1381 {
1382 	amdgpu_ring_write(ring, HEVC_ENC_CMD_END);
1383 }
1384 
1385 static void uvd_v7_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring,
1386 					    uint32_t reg, uint32_t val,
1387 					    uint32_t mask)
1388 {
1389 	amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT);
1390 	amdgpu_ring_write(ring,	reg << 2);
1391 	amdgpu_ring_write(ring, mask);
1392 	amdgpu_ring_write(ring, val);
1393 }
1394 
1395 static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
1396 					    unsigned int vmid, uint64_t pd_addr)
1397 {
1398 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
1399 
1400 	pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
1401 
1402 	/* wait for reg writes */
1403 	uvd_v7_0_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2,
1404 					lower_32_bits(pd_addr), 0xffffffff);
1405 }
1406 
1407 static void uvd_v7_0_enc_ring_emit_wreg(struct amdgpu_ring *ring,
1408 					uint32_t reg, uint32_t val)
1409 {
1410 	amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
1411 	amdgpu_ring_write(ring,	reg << 2);
1412 	amdgpu_ring_write(ring, val);
1413 }
1414 
1415 #if 0
1416 static bool uvd_v7_0_is_idle(void *handle)
1417 {
1418 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1419 
1420 	return !(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK);
1421 }
1422 
1423 static int uvd_v7_0_wait_for_idle(void *handle)
1424 {
1425 	unsigned i;
1426 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1427 
1428 	for (i = 0; i < adev->usec_timeout; i++) {
1429 		if (uvd_v7_0_is_idle(handle))
1430 			return 0;
1431 	}
1432 	return -ETIMEDOUT;
1433 }
1434 
1435 #define AMDGPU_UVD_STATUS_BUSY_MASK    0xfd
1436 static bool uvd_v7_0_check_soft_reset(void *handle)
1437 {
1438 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1439 	u32 srbm_soft_reset = 0;
1440 	u32 tmp = RREG32(mmSRBM_STATUS);
1441 
1442 	if (REG_GET_FIELD(tmp, SRBM_STATUS, UVD_RQ_PENDING) ||
1443 	    REG_GET_FIELD(tmp, SRBM_STATUS, UVD_BUSY) ||
1444 	    (RREG32_SOC15(UVD, ring->me, mmUVD_STATUS) &
1445 		    AMDGPU_UVD_STATUS_BUSY_MASK))
1446 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
1447 				SRBM_SOFT_RESET, SOFT_RESET_UVD, 1);
1448 
1449 	if (srbm_soft_reset) {
1450 		adev->uvd.inst[ring->me].srbm_soft_reset = srbm_soft_reset;
1451 		return true;
1452 	} else {
1453 		adev->uvd.inst[ring->me].srbm_soft_reset = 0;
1454 		return false;
1455 	}
1456 }
1457 
1458 static int uvd_v7_0_pre_soft_reset(void *handle)
1459 {
1460 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1461 
1462 	if (!adev->uvd.inst[ring->me].srbm_soft_reset)
1463 		return 0;
1464 
1465 	uvd_v7_0_stop(adev);
1466 	return 0;
1467 }
1468 
1469 static int uvd_v7_0_soft_reset(void *handle)
1470 {
1471 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1472 	u32 srbm_soft_reset;
1473 
1474 	if (!adev->uvd.inst[ring->me].srbm_soft_reset)
1475 		return 0;
1476 	srbm_soft_reset = adev->uvd.inst[ring->me].srbm_soft_reset;
1477 
1478 	if (srbm_soft_reset) {
1479 		u32 tmp;
1480 
1481 		tmp = RREG32(mmSRBM_SOFT_RESET);
1482 		tmp |= srbm_soft_reset;
1483 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
1484 		WREG32(mmSRBM_SOFT_RESET, tmp);
1485 		tmp = RREG32(mmSRBM_SOFT_RESET);
1486 
1487 		udelay(50);
1488 
1489 		tmp &= ~srbm_soft_reset;
1490 		WREG32(mmSRBM_SOFT_RESET, tmp);
1491 		tmp = RREG32(mmSRBM_SOFT_RESET);
1492 
1493 		/* Wait a little for things to settle down */
1494 		udelay(50);
1495 	}
1496 
1497 	return 0;
1498 }
1499 
1500 static int uvd_v7_0_post_soft_reset(void *handle)
1501 {
1502 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1503 
1504 	if (!adev->uvd.inst[ring->me].srbm_soft_reset)
1505 		return 0;
1506 
1507 	mdelay(5);
1508 
1509 	return uvd_v7_0_start(adev);
1510 }
1511 #endif
1512 
1513 static int uvd_v7_0_set_interrupt_state(struct amdgpu_device *adev,
1514 					struct amdgpu_irq_src *source,
1515 					unsigned type,
1516 					enum amdgpu_interrupt_state state)
1517 {
1518 	// TODO
1519 	return 0;
1520 }
1521 
1522 static int uvd_v7_0_process_interrupt(struct amdgpu_device *adev,
1523 				      struct amdgpu_irq_src *source,
1524 				      struct amdgpu_iv_entry *entry)
1525 {
1526 	uint32_t ip_instance;
1527 
1528 	switch (entry->client_id) {
1529 	case SOC15_IH_CLIENTID_UVD:
1530 		ip_instance = 0;
1531 		break;
1532 	case SOC15_IH_CLIENTID_UVD1:
1533 		ip_instance = 1;
1534 		break;
1535 	default:
1536 		DRM_ERROR("Unhandled client id: %d\n", entry->client_id);
1537 		return 0;
1538 	}
1539 
1540 	DRM_DEBUG("IH: UVD TRAP\n");
1541 
1542 	switch (entry->src_id) {
1543 	case 124:
1544 		amdgpu_fence_process(&adev->uvd.inst[ip_instance].ring);
1545 		break;
1546 	case 119:
1547 		amdgpu_fence_process(&adev->uvd.inst[ip_instance].ring_enc[0]);
1548 		break;
1549 	case 120:
1550 		if (!amdgpu_sriov_vf(adev))
1551 			amdgpu_fence_process(&adev->uvd.inst[ip_instance].ring_enc[1]);
1552 		break;
1553 	default:
1554 		DRM_ERROR("Unhandled interrupt: %d %d\n",
1555 			  entry->src_id, entry->src_data[0]);
1556 		break;
1557 	}
1558 
1559 	return 0;
1560 }
1561 
1562 #if 0
1563 static void uvd_v7_0_set_sw_clock_gating(struct amdgpu_device *adev)
1564 {
1565 	uint32_t data, data1, data2, suvd_flags;
1566 
1567 	data = RREG32_SOC15(UVD, ring->me, mmUVD_CGC_CTRL);
1568 	data1 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE);
1569 	data2 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_CTRL);
1570 
1571 	data &= ~(UVD_CGC_CTRL__CLK_OFF_DELAY_MASK |
1572 		  UVD_CGC_CTRL__CLK_GATE_DLY_TIMER_MASK);
1573 
1574 	suvd_flags = UVD_SUVD_CGC_GATE__SRE_MASK |
1575 		     UVD_SUVD_CGC_GATE__SIT_MASK |
1576 		     UVD_SUVD_CGC_GATE__SMP_MASK |
1577 		     UVD_SUVD_CGC_GATE__SCM_MASK |
1578 		     UVD_SUVD_CGC_GATE__SDB_MASK;
1579 
1580 	data |= UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK |
1581 		(1 << REG_FIELD_SHIFT(UVD_CGC_CTRL, CLK_GATE_DLY_TIMER)) |
1582 		(4 << REG_FIELD_SHIFT(UVD_CGC_CTRL, CLK_OFF_DELAY));
1583 
1584 	data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK |
1585 			UVD_CGC_CTRL__UDEC_CM_MODE_MASK |
1586 			UVD_CGC_CTRL__UDEC_IT_MODE_MASK |
1587 			UVD_CGC_CTRL__UDEC_DB_MODE_MASK |
1588 			UVD_CGC_CTRL__UDEC_MP_MODE_MASK |
1589 			UVD_CGC_CTRL__SYS_MODE_MASK |
1590 			UVD_CGC_CTRL__UDEC_MODE_MASK |
1591 			UVD_CGC_CTRL__MPEG2_MODE_MASK |
1592 			UVD_CGC_CTRL__REGS_MODE_MASK |
1593 			UVD_CGC_CTRL__RBC_MODE_MASK |
1594 			UVD_CGC_CTRL__LMI_MC_MODE_MASK |
1595 			UVD_CGC_CTRL__LMI_UMC_MODE_MASK |
1596 			UVD_CGC_CTRL__IDCT_MODE_MASK |
1597 			UVD_CGC_CTRL__MPRD_MODE_MASK |
1598 			UVD_CGC_CTRL__MPC_MODE_MASK |
1599 			UVD_CGC_CTRL__LBSI_MODE_MASK |
1600 			UVD_CGC_CTRL__LRBBM_MODE_MASK |
1601 			UVD_CGC_CTRL__WCB_MODE_MASK |
1602 			UVD_CGC_CTRL__VCPU_MODE_MASK |
1603 			UVD_CGC_CTRL__JPEG_MODE_MASK |
1604 			UVD_CGC_CTRL__JPEG2_MODE_MASK |
1605 			UVD_CGC_CTRL__SCPU_MODE_MASK);
1606 	data2 &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK |
1607 			UVD_SUVD_CGC_CTRL__SIT_MODE_MASK |
1608 			UVD_SUVD_CGC_CTRL__SMP_MODE_MASK |
1609 			UVD_SUVD_CGC_CTRL__SCM_MODE_MASK |
1610 			UVD_SUVD_CGC_CTRL__SDB_MODE_MASK);
1611 	data1 |= suvd_flags;
1612 
1613 	WREG32_SOC15(UVD, ring->me, mmUVD_CGC_CTRL, data);
1614 	WREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE, 0);
1615 	WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE, data1);
1616 	WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_CTRL, data2);
1617 }
1618 
1619 static void uvd_v7_0_set_hw_clock_gating(struct amdgpu_device *adev)
1620 {
1621 	uint32_t data, data1, cgc_flags, suvd_flags;
1622 
1623 	data = RREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE);
1624 	data1 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE);
1625 
1626 	cgc_flags = UVD_CGC_GATE__SYS_MASK |
1627 		UVD_CGC_GATE__UDEC_MASK |
1628 		UVD_CGC_GATE__MPEG2_MASK |
1629 		UVD_CGC_GATE__RBC_MASK |
1630 		UVD_CGC_GATE__LMI_MC_MASK |
1631 		UVD_CGC_GATE__IDCT_MASK |
1632 		UVD_CGC_GATE__MPRD_MASK |
1633 		UVD_CGC_GATE__MPC_MASK |
1634 		UVD_CGC_GATE__LBSI_MASK |
1635 		UVD_CGC_GATE__LRBBM_MASK |
1636 		UVD_CGC_GATE__UDEC_RE_MASK |
1637 		UVD_CGC_GATE__UDEC_CM_MASK |
1638 		UVD_CGC_GATE__UDEC_IT_MASK |
1639 		UVD_CGC_GATE__UDEC_DB_MASK |
1640 		UVD_CGC_GATE__UDEC_MP_MASK |
1641 		UVD_CGC_GATE__WCB_MASK |
1642 		UVD_CGC_GATE__VCPU_MASK |
1643 		UVD_CGC_GATE__SCPU_MASK |
1644 		UVD_CGC_GATE__JPEG_MASK |
1645 		UVD_CGC_GATE__JPEG2_MASK;
1646 
1647 	suvd_flags = UVD_SUVD_CGC_GATE__SRE_MASK |
1648 				UVD_SUVD_CGC_GATE__SIT_MASK |
1649 				UVD_SUVD_CGC_GATE__SMP_MASK |
1650 				UVD_SUVD_CGC_GATE__SCM_MASK |
1651 				UVD_SUVD_CGC_GATE__SDB_MASK;
1652 
1653 	data |= cgc_flags;
1654 	data1 |= suvd_flags;
1655 
1656 	WREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE, data);
1657 	WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE, data1);
1658 }
1659 
1660 static void uvd_v7_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
1661 {
1662 	u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
1663 
1664 	if (enable)
1665 		tmp |= (GCK_DFS_BYPASS_CNTL__BYPASSDCLK_MASK |
1666 			GCK_DFS_BYPASS_CNTL__BYPASSVCLK_MASK);
1667 	else
1668 		tmp &= ~(GCK_DFS_BYPASS_CNTL__BYPASSDCLK_MASK |
1669 			 GCK_DFS_BYPASS_CNTL__BYPASSVCLK_MASK);
1670 
1671 	WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
1672 }
1673 
1674 
1675 static int uvd_v7_0_set_clockgating_state(void *handle,
1676 					  enum amd_clockgating_state state)
1677 {
1678 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1679 	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
1680 
1681 	uvd_v7_0_set_bypass_mode(adev, enable);
1682 
1683 	if (!(adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG))
1684 		return 0;
1685 
1686 	if (enable) {
1687 		/* disable HW gating and enable Sw gating */
1688 		uvd_v7_0_set_sw_clock_gating(adev);
1689 	} else {
1690 		/* wait for STATUS to clear */
1691 		if (uvd_v7_0_wait_for_idle(handle))
1692 			return -EBUSY;
1693 
1694 		/* enable HW gates because UVD is idle */
1695 		/* uvd_v7_0_set_hw_clock_gating(adev); */
1696 	}
1697 
1698 	return 0;
1699 }
1700 
1701 static int uvd_v7_0_set_powergating_state(void *handle,
1702 					  enum amd_powergating_state state)
1703 {
1704 	/* This doesn't actually powergate the UVD block.
1705 	 * That's done in the dpm code via the SMC.  This
1706 	 * just re-inits the block as necessary.  The actual
1707 	 * gating still happens in the dpm code.  We should
1708 	 * revisit this when there is a cleaner line between
1709 	 * the smc and the hw blocks
1710 	 */
1711 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1712 
1713 	if (!(adev->pg_flags & AMD_PG_SUPPORT_UVD))
1714 		return 0;
1715 
1716 	WREG32_SOC15(UVD, ring->me, mmUVD_POWER_STATUS, UVD_POWER_STATUS__UVD_PG_EN_MASK);
1717 
1718 	if (state == AMD_PG_STATE_GATE) {
1719 		uvd_v7_0_stop(adev);
1720 		return 0;
1721 	} else {
1722 		return uvd_v7_0_start(adev);
1723 	}
1724 }
1725 #endif
1726 
1727 static int uvd_v7_0_set_clockgating_state(void *handle,
1728 					  enum amd_clockgating_state state)
1729 {
1730 	/* needed for driver unload*/
1731 	return 0;
1732 }
1733 
1734 const struct amd_ip_funcs uvd_v7_0_ip_funcs = {
1735 	.name = "uvd_v7_0",
1736 	.early_init = uvd_v7_0_early_init,
1737 	.late_init = NULL,
1738 	.sw_init = uvd_v7_0_sw_init,
1739 	.sw_fini = uvd_v7_0_sw_fini,
1740 	.hw_init = uvd_v7_0_hw_init,
1741 	.hw_fini = uvd_v7_0_hw_fini,
1742 	.suspend = uvd_v7_0_suspend,
1743 	.resume = uvd_v7_0_resume,
1744 	.is_idle = NULL /* uvd_v7_0_is_idle */,
1745 	.wait_for_idle = NULL /* uvd_v7_0_wait_for_idle */,
1746 	.check_soft_reset = NULL /* uvd_v7_0_check_soft_reset */,
1747 	.pre_soft_reset = NULL /* uvd_v7_0_pre_soft_reset */,
1748 	.soft_reset = NULL /* uvd_v7_0_soft_reset */,
1749 	.post_soft_reset = NULL /* uvd_v7_0_post_soft_reset */,
1750 	.set_clockgating_state = uvd_v7_0_set_clockgating_state,
1751 	.set_powergating_state = NULL /* uvd_v7_0_set_powergating_state */,
1752 };
1753 
1754 static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
1755 	.type = AMDGPU_RING_TYPE_UVD,
1756 	.align_mask = 0xf,
1757 	.support_64bit_ptrs = false,
1758 	.vmhub = AMDGPU_MMHUB,
1759 	.get_rptr = uvd_v7_0_ring_get_rptr,
1760 	.get_wptr = uvd_v7_0_ring_get_wptr,
1761 	.set_wptr = uvd_v7_0_ring_set_wptr,
1762 	.patch_cs_in_place = uvd_v7_0_ring_patch_cs_in_place,
1763 	.emit_frame_size =
1764 		6 + /* hdp invalidate */
1765 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
1766 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
1767 		8 + /* uvd_v7_0_ring_emit_vm_flush */
1768 		14 + 14, /* uvd_v7_0_ring_emit_fence x2 vm fence */
1769 	.emit_ib_size = 8, /* uvd_v7_0_ring_emit_ib */
1770 	.emit_ib = uvd_v7_0_ring_emit_ib,
1771 	.emit_fence = uvd_v7_0_ring_emit_fence,
1772 	.emit_vm_flush = uvd_v7_0_ring_emit_vm_flush,
1773 	.emit_hdp_flush = uvd_v7_0_ring_emit_hdp_flush,
1774 	.test_ring = uvd_v7_0_ring_test_ring,
1775 	.test_ib = amdgpu_uvd_ring_test_ib,
1776 	.insert_nop = uvd_v7_0_ring_insert_nop,
1777 	.pad_ib = amdgpu_ring_generic_pad_ib,
1778 	.begin_use = amdgpu_uvd_ring_begin_use,
1779 	.end_use = amdgpu_uvd_ring_end_use,
1780 	.emit_wreg = uvd_v7_0_ring_emit_wreg,
1781 	.emit_reg_wait = uvd_v7_0_ring_emit_reg_wait,
1782 	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1783 };
1784 
1785 static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
1786 	.type = AMDGPU_RING_TYPE_UVD_ENC,
1787 	.align_mask = 0x3f,
1788 	.nop = HEVC_ENC_CMD_NO_OP,
1789 	.support_64bit_ptrs = false,
1790 	.vmhub = AMDGPU_MMHUB,
1791 	.get_rptr = uvd_v7_0_enc_ring_get_rptr,
1792 	.get_wptr = uvd_v7_0_enc_ring_get_wptr,
1793 	.set_wptr = uvd_v7_0_enc_ring_set_wptr,
1794 	.emit_frame_size =
1795 		3 + 3 + /* hdp flush / invalidate */
1796 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1797 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1798 		4 + /* uvd_v7_0_enc_ring_emit_vm_flush */
1799 		5 + 5 + /* uvd_v7_0_enc_ring_emit_fence x2 vm fence */
1800 		1, /* uvd_v7_0_enc_ring_insert_end */
1801 	.emit_ib_size = 5, /* uvd_v7_0_enc_ring_emit_ib */
1802 	.emit_ib = uvd_v7_0_enc_ring_emit_ib,
1803 	.emit_fence = uvd_v7_0_enc_ring_emit_fence,
1804 	.emit_vm_flush = uvd_v7_0_enc_ring_emit_vm_flush,
1805 	.test_ring = uvd_v7_0_enc_ring_test_ring,
1806 	.test_ib = uvd_v7_0_enc_ring_test_ib,
1807 	.insert_nop = amdgpu_ring_insert_nop,
1808 	.insert_end = uvd_v7_0_enc_ring_insert_end,
1809 	.pad_ib = amdgpu_ring_generic_pad_ib,
1810 	.begin_use = amdgpu_uvd_ring_begin_use,
1811 	.end_use = amdgpu_uvd_ring_end_use,
1812 	.emit_wreg = uvd_v7_0_enc_ring_emit_wreg,
1813 	.emit_reg_wait = uvd_v7_0_enc_ring_emit_reg_wait,
1814 	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1815 };
1816 
1817 static void uvd_v7_0_set_ring_funcs(struct amdgpu_device *adev)
1818 {
1819 	int i;
1820 
1821 	for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
1822 		if (adev->uvd.harvest_config & (1 << i))
1823 			continue;
1824 		adev->uvd.inst[i].ring.funcs = &uvd_v7_0_ring_vm_funcs;
1825 		adev->uvd.inst[i].ring.me = i;
1826 		DRM_INFO("UVD(%d) is enabled in VM mode\n", i);
1827 	}
1828 }
1829 
1830 static void uvd_v7_0_set_enc_ring_funcs(struct amdgpu_device *adev)
1831 {
1832 	int i, j;
1833 
1834 	for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
1835 		if (adev->uvd.harvest_config & (1 << j))
1836 			continue;
1837 		for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
1838 			adev->uvd.inst[j].ring_enc[i].funcs = &uvd_v7_0_enc_ring_vm_funcs;
1839 			adev->uvd.inst[j].ring_enc[i].me = j;
1840 		}
1841 
1842 		DRM_INFO("UVD(%d) ENC is enabled in VM mode\n", j);
1843 	}
1844 }
1845 
1846 static const struct amdgpu_irq_src_funcs uvd_v7_0_irq_funcs = {
1847 	.set = uvd_v7_0_set_interrupt_state,
1848 	.process = uvd_v7_0_process_interrupt,
1849 };
1850 
1851 static void uvd_v7_0_set_irq_funcs(struct amdgpu_device *adev)
1852 {
1853 	int i;
1854 
1855 	for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
1856 		if (adev->uvd.harvest_config & (1 << i))
1857 			continue;
1858 		adev->uvd.inst[i].irq.num_types = adev->uvd.num_enc_rings + 1;
1859 		adev->uvd.inst[i].irq.funcs = &uvd_v7_0_irq_funcs;
1860 	}
1861 }
1862 
1863 const struct amdgpu_ip_block_version uvd_v7_0_ip_block =
1864 {
1865 		.type = AMD_IP_BLOCK_TYPE_UVD,
1866 		.major = 7,
1867 		.minor = 0,
1868 		.rev = 0,
1869 		.funcs = &uvd_v7_0_ip_funcs,
1870 };
1871