xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c (revision 023e41632e065d49bcbe31b3c4b336217f96a271)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  */
26 
27 #include <linux/firmware.h>
28 #include <linux/module.h>
29 #include <drm/drmP.h>
30 #include <drm/drm.h>
31 
32 #include "amdgpu.h"
33 #include "amdgpu_pm.h"
34 #include "amdgpu_vcn.h"
35 #include "soc15d.h"
36 #include "soc15_common.h"
37 
38 #include "vcn/vcn_1_0_offset.h"
39 #include "vcn/vcn_1_0_sh_mask.h"
40 
41 /* 1 second timeout */
42 #define VCN_IDLE_TIMEOUT	msecs_to_jiffies(1000)
43 
44 /* Firmware Names */
45 #define FIRMWARE_RAVEN		"amdgpu/raven_vcn.bin"
46 #define FIRMWARE_PICASSO	"amdgpu/picasso_vcn.bin"
47 #define FIRMWARE_RAVEN2		"amdgpu/raven2_vcn.bin"
48 
49 MODULE_FIRMWARE(FIRMWARE_RAVEN);
50 MODULE_FIRMWARE(FIRMWARE_PICASSO);
51 MODULE_FIRMWARE(FIRMWARE_RAVEN2);
52 
53 static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
54 
55 int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
56 {
57 	unsigned long bo_size;
58 	const char *fw_name;
59 	const struct common_firmware_header *hdr;
60 	unsigned char fw_check;
61 	int r;
62 
63 	INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler);
64 
65 	switch (adev->asic_type) {
66 	case CHIP_RAVEN:
67 		if (adev->rev_id >= 8)
68 			fw_name = FIRMWARE_RAVEN2;
69 		else if (adev->pdev->device == 0x15d8)
70 			fw_name = FIRMWARE_PICASSO;
71 		else
72 			fw_name = FIRMWARE_RAVEN;
73 		break;
74 	default:
75 		return -EINVAL;
76 	}
77 
78 	r = request_firmware(&adev->vcn.fw, fw_name, adev->dev);
79 	if (r) {
80 		dev_err(adev->dev, "amdgpu_vcn: Can't load firmware \"%s\"\n",
81 			fw_name);
82 		return r;
83 	}
84 
85 	r = amdgpu_ucode_validate(adev->vcn.fw);
86 	if (r) {
87 		dev_err(adev->dev, "amdgpu_vcn: Can't validate firmware \"%s\"\n",
88 			fw_name);
89 		release_firmware(adev->vcn.fw);
90 		adev->vcn.fw = NULL;
91 		return r;
92 	}
93 
94 	hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
95 	adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version);
96 
97 	/* Bit 20-23, it is encode major and non-zero for new naming convention.
98 	 * This field is part of version minor and DRM_DISABLED_FLAG in old naming
99 	 * convention. Since the l:wq!atest version minor is 0x5B and DRM_DISABLED_FLAG
100 	 * is zero in old naming convention, this field is always zero so far.
101 	 * These four bits are used to tell which naming convention is present.
102 	 */
103 	fw_check = (le32_to_cpu(hdr->ucode_version) >> 20) & 0xf;
104 	if (fw_check) {
105 		unsigned int dec_ver, enc_major, enc_minor, vep, fw_rev;
106 
107 		fw_rev = le32_to_cpu(hdr->ucode_version) & 0xfff;
108 		enc_minor = (le32_to_cpu(hdr->ucode_version) >> 12) & 0xff;
109 		enc_major = fw_check;
110 		dec_ver = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xf;
111 		vep = (le32_to_cpu(hdr->ucode_version) >> 28) & 0xf;
112 		DRM_INFO("Found VCN firmware Version ENC: %hu.%hu DEC: %hu VEP: %hu Revision: %hu\n",
113 			enc_major, enc_minor, dec_ver, vep, fw_rev);
114 	} else {
115 		unsigned int version_major, version_minor, family_id;
116 
117 		family_id = le32_to_cpu(hdr->ucode_version) & 0xff;
118 		version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
119 		version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
120 		DRM_INFO("Found VCN firmware Version: %hu.%hu Family ID: %hu\n",
121 			version_major, version_minor, family_id);
122 	}
123 
124 	bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE;
125 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
126 		bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
127 	r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
128 				    AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.vcpu_bo,
129 				    &adev->vcn.gpu_addr, &adev->vcn.cpu_addr);
130 	if (r) {
131 		dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r);
132 		return r;
133 	}
134 
135 	return 0;
136 }
137 
138 int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
139 {
140 	int i;
141 
142 	kvfree(adev->vcn.saved_bo);
143 
144 	amdgpu_bo_free_kernel(&adev->vcn.vcpu_bo,
145 			      &adev->vcn.gpu_addr,
146 			      (void **)&adev->vcn.cpu_addr);
147 
148 	amdgpu_ring_fini(&adev->vcn.ring_dec);
149 
150 	for (i = 0; i < adev->vcn.num_enc_rings; ++i)
151 		amdgpu_ring_fini(&adev->vcn.ring_enc[i]);
152 
153 	amdgpu_ring_fini(&adev->vcn.ring_jpeg);
154 
155 	release_firmware(adev->vcn.fw);
156 
157 	return 0;
158 }
159 
160 int amdgpu_vcn_suspend(struct amdgpu_device *adev)
161 {
162 	unsigned size;
163 	void *ptr;
164 
165 	cancel_delayed_work_sync(&adev->vcn.idle_work);
166 
167 	if (adev->vcn.vcpu_bo == NULL)
168 		return 0;
169 
170 	size = amdgpu_bo_size(adev->vcn.vcpu_bo);
171 	ptr = adev->vcn.cpu_addr;
172 
173 	adev->vcn.saved_bo = kvmalloc(size, GFP_KERNEL);
174 	if (!adev->vcn.saved_bo)
175 		return -ENOMEM;
176 
177 	memcpy_fromio(adev->vcn.saved_bo, ptr, size);
178 
179 	return 0;
180 }
181 
182 int amdgpu_vcn_resume(struct amdgpu_device *adev)
183 {
184 	unsigned size;
185 	void *ptr;
186 
187 	if (adev->vcn.vcpu_bo == NULL)
188 		return -EINVAL;
189 
190 	size = amdgpu_bo_size(adev->vcn.vcpu_bo);
191 	ptr = adev->vcn.cpu_addr;
192 
193 	if (adev->vcn.saved_bo != NULL) {
194 		memcpy_toio(ptr, adev->vcn.saved_bo, size);
195 		kvfree(adev->vcn.saved_bo);
196 		adev->vcn.saved_bo = NULL;
197 	} else {
198 		const struct common_firmware_header *hdr;
199 		unsigned offset;
200 
201 		hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
202 		if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
203 			offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
204 			memcpy_toio(adev->vcn.cpu_addr, adev->vcn.fw->data + offset,
205 				    le32_to_cpu(hdr->ucode_size_bytes));
206 			size -= le32_to_cpu(hdr->ucode_size_bytes);
207 			ptr += le32_to_cpu(hdr->ucode_size_bytes);
208 		}
209 		memset_io(ptr, 0, size);
210 	}
211 
212 	return 0;
213 }
214 
215 static int amdgpu_vcn_pause_dpg_mode(struct amdgpu_device *adev,
216 				     struct dpg_pause_state *new_state)
217 {
218 	int ret_code;
219 	uint32_t reg_data = 0;
220 	uint32_t reg_data2 = 0;
221 	struct amdgpu_ring *ring;
222 
223 	/* pause/unpause if state is changed */
224 	if (adev->vcn.pause_state.fw_based != new_state->fw_based) {
225 		DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
226 			adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
227 			new_state->fw_based, new_state->jpeg);
228 
229 		reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
230 			(~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
231 
232 		if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
233 			ret_code = 0;
234 
235 			if (!(reg_data & UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK))
236 				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
237 						   UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
238 						   UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
239 
240 			if (!ret_code) {
241 				/* pause DPG non-jpeg */
242 				reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
243 				WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
244 				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
245 						   UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
246 						   UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code);
247 
248 				/* Restore */
249 				ring = &adev->vcn.ring_enc[0];
250 				WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
251 				WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
252 				WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
253 				WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
254 				WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
255 
256 				ring = &adev->vcn.ring_enc[1];
257 				WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
258 				WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
259 				WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
260 				WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
261 				WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
262 
263 				ring = &adev->vcn.ring_dec;
264 				WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
265 						   RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
266 				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
267 						   UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
268 						   UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
269 			}
270 		} else {
271 			/* unpause dpg non-jpeg, no need to wait */
272 			reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
273 			WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
274 		}
275 		adev->vcn.pause_state.fw_based = new_state->fw_based;
276 	}
277 
278 	/* pause/unpause if state is changed */
279 	if (adev->vcn.pause_state.jpeg != new_state->jpeg) {
280 		DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
281 			adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
282 			new_state->fw_based, new_state->jpeg);
283 
284 		reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
285 			(~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK);
286 
287 		if (new_state->jpeg == VCN_DPG_STATE__PAUSE) {
288 			ret_code = 0;
289 
290 			if (!(reg_data & UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK))
291 				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
292 						   UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
293 						   UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
294 
295 			if (!ret_code) {
296 				/* Make sure JPRG Snoop is disabled before sending the pause */
297 				reg_data2 = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS);
298 				reg_data2 |= UVD_POWER_STATUS__JRBC_SNOOP_DIS_MASK;
299 				WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, reg_data2);
300 
301 				/* pause DPG jpeg */
302 				reg_data |= UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK;
303 				WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
304 				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
305 							UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK,
306 							UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, ret_code);
307 
308 				/* Restore */
309 				ring = &adev->vcn.ring_jpeg;
310 				WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
311 				WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
312 							UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK |
313 							UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
314 				WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
315 							lower_32_bits(ring->gpu_addr));
316 				WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
317 							upper_32_bits(ring->gpu_addr));
318 				WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, ring->wptr);
319 				WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, ring->wptr);
320 				WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
321 							UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
322 
323 				ring = &adev->vcn.ring_dec;
324 				WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
325 						   RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
326 				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
327 						   UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
328 						   UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
329 			}
330 		} else {
331 			/* unpause dpg jpeg, no need to wait */
332 			reg_data &= ~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK;
333 			WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
334 		}
335 		adev->vcn.pause_state.jpeg = new_state->jpeg;
336 	}
337 
338 	return 0;
339 }
340 
341 static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
342 {
343 	struct amdgpu_device *adev =
344 		container_of(work, struct amdgpu_device, vcn.idle_work.work);
345 	unsigned int fences = 0;
346 	unsigned int i;
347 
348 	for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
349 		fences += amdgpu_fence_count_emitted(&adev->vcn.ring_enc[i]);
350 	}
351 
352 	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)	{
353 		struct dpg_pause_state new_state;
354 
355 		if (fences)
356 			new_state.fw_based = VCN_DPG_STATE__PAUSE;
357 		else
358 			new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
359 
360 		if (amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg))
361 			new_state.jpeg = VCN_DPG_STATE__PAUSE;
362 		else
363 			new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
364 
365 		amdgpu_vcn_pause_dpg_mode(adev, &new_state);
366 	}
367 
368 	fences += amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg);
369 	fences += amdgpu_fence_count_emitted(&adev->vcn.ring_dec);
370 
371 	if (fences == 0) {
372 		amdgpu_gfx_off_ctrl(adev, true);
373 		if (adev->pm.dpm_enabled)
374 			amdgpu_dpm_enable_uvd(adev, false);
375 		else
376 			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
377 							       AMD_PG_STATE_GATE);
378 	} else {
379 		schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
380 	}
381 }
382 
383 void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
384 {
385 	struct amdgpu_device *adev = ring->adev;
386 	bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
387 
388 	if (set_clocks) {
389 		amdgpu_gfx_off_ctrl(adev, false);
390 		if (adev->pm.dpm_enabled)
391 			amdgpu_dpm_enable_uvd(adev, true);
392 		else
393 			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
394 							       AMD_PG_STATE_UNGATE);
395 	}
396 
397 	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)	{
398 		struct dpg_pause_state new_state;
399 		unsigned int fences = 0;
400 		unsigned int i;
401 
402 		for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
403 			fences += amdgpu_fence_count_emitted(&adev->vcn.ring_enc[i]);
404 		}
405 		if (fences)
406 			new_state.fw_based = VCN_DPG_STATE__PAUSE;
407 		else
408 			new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
409 
410 		if (amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg))
411 			new_state.jpeg = VCN_DPG_STATE__PAUSE;
412 		else
413 			new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
414 
415 		if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
416 			new_state.fw_based = VCN_DPG_STATE__PAUSE;
417 		else if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG)
418 			new_state.jpeg = VCN_DPG_STATE__PAUSE;
419 
420 		amdgpu_vcn_pause_dpg_mode(adev, &new_state);
421 	}
422 }
423 
424 void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)
425 {
426 	schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
427 }
428 
429 int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
430 {
431 	struct amdgpu_device *adev = ring->adev;
432 	uint32_t tmp = 0;
433 	unsigned i;
434 	int r;
435 
436 	WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0xCAFEDEAD);
437 	r = amdgpu_ring_alloc(ring, 3);
438 	if (r)
439 		return r;
440 
441 	amdgpu_ring_write(ring,
442 		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0));
443 	amdgpu_ring_write(ring, 0xDEADBEEF);
444 	amdgpu_ring_commit(ring);
445 	for (i = 0; i < adev->usec_timeout; i++) {
446 		tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9));
447 		if (tmp == 0xDEADBEEF)
448 			break;
449 		DRM_UDELAY(1);
450 	}
451 
452 	if (i >= adev->usec_timeout)
453 		r = -ETIMEDOUT;
454 
455 	return r;
456 }
457 
458 static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
459 				   struct amdgpu_bo *bo,
460 				   struct dma_fence **fence)
461 {
462 	struct amdgpu_device *adev = ring->adev;
463 	struct dma_fence *f = NULL;
464 	struct amdgpu_job *job;
465 	struct amdgpu_ib *ib;
466 	uint64_t addr;
467 	int i, r;
468 
469 	r = amdgpu_job_alloc_with_ib(adev, 64, &job);
470 	if (r)
471 		goto err;
472 
473 	ib = &job->ibs[0];
474 	addr = amdgpu_bo_gpu_offset(bo);
475 	ib->ptr[0] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0);
476 	ib->ptr[1] = addr;
477 	ib->ptr[2] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0);
478 	ib->ptr[3] = addr >> 32;
479 	ib->ptr[4] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0);
480 	ib->ptr[5] = 0;
481 	for (i = 6; i < 16; i += 2) {
482 		ib->ptr[i] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0);
483 		ib->ptr[i+1] = 0;
484 	}
485 	ib->length_dw = 16;
486 
487 	r = amdgpu_job_submit_direct(job, ring, &f);
488 	if (r)
489 		goto err_free;
490 
491 	amdgpu_bo_fence(bo, f, false);
492 	amdgpu_bo_unreserve(bo);
493 	amdgpu_bo_unref(&bo);
494 
495 	if (fence)
496 		*fence = dma_fence_get(f);
497 	dma_fence_put(f);
498 
499 	return 0;
500 
501 err_free:
502 	amdgpu_job_free(job);
503 
504 err:
505 	amdgpu_bo_unreserve(bo);
506 	amdgpu_bo_unref(&bo);
507 	return r;
508 }
509 
510 static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
511 			      struct dma_fence **fence)
512 {
513 	struct amdgpu_device *adev = ring->adev;
514 	struct amdgpu_bo *bo = NULL;
515 	uint32_t *msg;
516 	int r, i;
517 
518 	r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
519 				      AMDGPU_GEM_DOMAIN_VRAM,
520 				      &bo, NULL, (void **)&msg);
521 	if (r)
522 		return r;
523 
524 	msg[0] = cpu_to_le32(0x00000028);
525 	msg[1] = cpu_to_le32(0x00000038);
526 	msg[2] = cpu_to_le32(0x00000001);
527 	msg[3] = cpu_to_le32(0x00000000);
528 	msg[4] = cpu_to_le32(handle);
529 	msg[5] = cpu_to_le32(0x00000000);
530 	msg[6] = cpu_to_le32(0x00000001);
531 	msg[7] = cpu_to_le32(0x00000028);
532 	msg[8] = cpu_to_le32(0x00000010);
533 	msg[9] = cpu_to_le32(0x00000000);
534 	msg[10] = cpu_to_le32(0x00000007);
535 	msg[11] = cpu_to_le32(0x00000000);
536 	msg[12] = cpu_to_le32(0x00000780);
537 	msg[13] = cpu_to_le32(0x00000440);
538 	for (i = 14; i < 1024; ++i)
539 		msg[i] = cpu_to_le32(0x0);
540 
541 	return amdgpu_vcn_dec_send_msg(ring, bo, fence);
542 }
543 
544 static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
545 			       struct dma_fence **fence)
546 {
547 	struct amdgpu_device *adev = ring->adev;
548 	struct amdgpu_bo *bo = NULL;
549 	uint32_t *msg;
550 	int r, i;
551 
552 	r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
553 				      AMDGPU_GEM_DOMAIN_VRAM,
554 				      &bo, NULL, (void **)&msg);
555 	if (r)
556 		return r;
557 
558 	msg[0] = cpu_to_le32(0x00000028);
559 	msg[1] = cpu_to_le32(0x00000018);
560 	msg[2] = cpu_to_le32(0x00000000);
561 	msg[3] = cpu_to_le32(0x00000002);
562 	msg[4] = cpu_to_le32(handle);
563 	msg[5] = cpu_to_le32(0x00000000);
564 	for (i = 6; i < 1024; ++i)
565 		msg[i] = cpu_to_le32(0x0);
566 
567 	return amdgpu_vcn_dec_send_msg(ring, bo, fence);
568 }
569 
570 int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
571 {
572 	struct dma_fence *fence;
573 	long r;
574 
575 	r = amdgpu_vcn_dec_get_create_msg(ring, 1, NULL);
576 	if (r)
577 		goto error;
578 
579 	r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &fence);
580 	if (r)
581 		goto error;
582 
583 	r = dma_fence_wait_timeout(fence, false, timeout);
584 	if (r == 0)
585 		r = -ETIMEDOUT;
586 	else if (r > 0)
587 		r = 0;
588 
589 	dma_fence_put(fence);
590 error:
591 	return r;
592 }
593 
594 int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring)
595 {
596 	struct amdgpu_device *adev = ring->adev;
597 	uint32_t rptr = amdgpu_ring_get_rptr(ring);
598 	unsigned i;
599 	int r;
600 
601 	r = amdgpu_ring_alloc(ring, 16);
602 	if (r)
603 		return r;
604 
605 	amdgpu_ring_write(ring, VCN_ENC_CMD_END);
606 	amdgpu_ring_commit(ring);
607 
608 	for (i = 0; i < adev->usec_timeout; i++) {
609 		if (amdgpu_ring_get_rptr(ring) != rptr)
610 			break;
611 		DRM_UDELAY(1);
612 	}
613 
614 	if (i >= adev->usec_timeout)
615 		r = -ETIMEDOUT;
616 
617 	return r;
618 }
619 
620 static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
621 			      struct dma_fence **fence)
622 {
623 	const unsigned ib_size_dw = 16;
624 	struct amdgpu_job *job;
625 	struct amdgpu_ib *ib;
626 	struct dma_fence *f = NULL;
627 	uint64_t dummy;
628 	int i, r;
629 
630 	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
631 	if (r)
632 		return r;
633 
634 	ib = &job->ibs[0];
635 	dummy = ib->gpu_addr + 1024;
636 
637 	ib->length_dw = 0;
638 	ib->ptr[ib->length_dw++] = 0x00000018;
639 	ib->ptr[ib->length_dw++] = 0x00000001; /* session info */
640 	ib->ptr[ib->length_dw++] = handle;
641 	ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
642 	ib->ptr[ib->length_dw++] = dummy;
643 	ib->ptr[ib->length_dw++] = 0x0000000b;
644 
645 	ib->ptr[ib->length_dw++] = 0x00000014;
646 	ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
647 	ib->ptr[ib->length_dw++] = 0x0000001c;
648 	ib->ptr[ib->length_dw++] = 0x00000000;
649 	ib->ptr[ib->length_dw++] = 0x00000000;
650 
651 	ib->ptr[ib->length_dw++] = 0x00000008;
652 	ib->ptr[ib->length_dw++] = 0x08000001; /* op initialize */
653 
654 	for (i = ib->length_dw; i < ib_size_dw; ++i)
655 		ib->ptr[i] = 0x0;
656 
657 	r = amdgpu_job_submit_direct(job, ring, &f);
658 	if (r)
659 		goto err;
660 
661 	if (fence)
662 		*fence = dma_fence_get(f);
663 	dma_fence_put(f);
664 
665 	return 0;
666 
667 err:
668 	amdgpu_job_free(job);
669 	return r;
670 }
671 
672 static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
673 				struct dma_fence **fence)
674 {
675 	const unsigned ib_size_dw = 16;
676 	struct amdgpu_job *job;
677 	struct amdgpu_ib *ib;
678 	struct dma_fence *f = NULL;
679 	uint64_t dummy;
680 	int i, r;
681 
682 	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
683 	if (r)
684 		return r;
685 
686 	ib = &job->ibs[0];
687 	dummy = ib->gpu_addr + 1024;
688 
689 	ib->length_dw = 0;
690 	ib->ptr[ib->length_dw++] = 0x00000018;
691 	ib->ptr[ib->length_dw++] = 0x00000001;
692 	ib->ptr[ib->length_dw++] = handle;
693 	ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
694 	ib->ptr[ib->length_dw++] = dummy;
695 	ib->ptr[ib->length_dw++] = 0x0000000b;
696 
697 	ib->ptr[ib->length_dw++] = 0x00000014;
698 	ib->ptr[ib->length_dw++] = 0x00000002;
699 	ib->ptr[ib->length_dw++] = 0x0000001c;
700 	ib->ptr[ib->length_dw++] = 0x00000000;
701 	ib->ptr[ib->length_dw++] = 0x00000000;
702 
703 	ib->ptr[ib->length_dw++] = 0x00000008;
704 	ib->ptr[ib->length_dw++] = 0x08000002; /* op close session */
705 
706 	for (i = ib->length_dw; i < ib_size_dw; ++i)
707 		ib->ptr[i] = 0x0;
708 
709 	r = amdgpu_job_submit_direct(job, ring, &f);
710 	if (r)
711 		goto err;
712 
713 	if (fence)
714 		*fence = dma_fence_get(f);
715 	dma_fence_put(f);
716 
717 	return 0;
718 
719 err:
720 	amdgpu_job_free(job);
721 	return r;
722 }
723 
724 int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
725 {
726 	struct dma_fence *fence = NULL;
727 	long r;
728 
729 	r = amdgpu_vcn_enc_get_create_msg(ring, 1, NULL);
730 	if (r)
731 		goto error;
732 
733 	r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, &fence);
734 	if (r)
735 		goto error;
736 
737 	r = dma_fence_wait_timeout(fence, false, timeout);
738 	if (r == 0)
739 		r = -ETIMEDOUT;
740 	else if (r > 0)
741 		r = 0;
742 
743 error:
744 	dma_fence_put(fence);
745 	return r;
746 }
747 
748 int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring)
749 {
750 	struct amdgpu_device *adev = ring->adev;
751 	uint32_t tmp = 0;
752 	unsigned i;
753 	int r;
754 
755 	WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0xCAFEDEAD);
756 	r = amdgpu_ring_alloc(ring, 3);
757 
758 	if (r)
759 		return r;
760 
761 	amdgpu_ring_write(ring,
762 		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0, 0, 0));
763 	amdgpu_ring_write(ring, 0xDEADBEEF);
764 	amdgpu_ring_commit(ring);
765 
766 	for (i = 0; i < adev->usec_timeout; i++) {
767 		tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9));
768 		if (tmp == 0xDEADBEEF)
769 			break;
770 		DRM_UDELAY(1);
771 	}
772 
773 	if (i >= adev->usec_timeout)
774 		r = -ETIMEDOUT;
775 
776 	return r;
777 }
778 
779 static int amdgpu_vcn_jpeg_set_reg(struct amdgpu_ring *ring, uint32_t handle,
780 		struct dma_fence **fence)
781 {
782 	struct amdgpu_device *adev = ring->adev;
783 	struct amdgpu_job *job;
784 	struct amdgpu_ib *ib;
785 	struct dma_fence *f = NULL;
786 	const unsigned ib_size_dw = 16;
787 	int i, r;
788 
789 	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
790 	if (r)
791 		return r;
792 
793 	ib = &job->ibs[0];
794 
795 	ib->ptr[0] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0, 0, PACKETJ_TYPE0);
796 	ib->ptr[1] = 0xDEADBEEF;
797 	for (i = 2; i < 16; i += 2) {
798 		ib->ptr[i] = PACKETJ(0, 0, 0, PACKETJ_TYPE6);
799 		ib->ptr[i+1] = 0;
800 	}
801 	ib->length_dw = 16;
802 
803 	r = amdgpu_job_submit_direct(job, ring, &f);
804 	if (r)
805 		goto err;
806 
807 	if (fence)
808 		*fence = dma_fence_get(f);
809 	dma_fence_put(f);
810 
811 	return 0;
812 
813 err:
814 	amdgpu_job_free(job);
815 	return r;
816 }
817 
818 int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout)
819 {
820 	struct amdgpu_device *adev = ring->adev;
821 	uint32_t tmp = 0;
822 	unsigned i;
823 	struct dma_fence *fence = NULL;
824 	long r = 0;
825 
826 	r = amdgpu_vcn_jpeg_set_reg(ring, 1, &fence);
827 	if (r)
828 		goto error;
829 
830 	r = dma_fence_wait_timeout(fence, false, timeout);
831 	if (r == 0) {
832 		r = -ETIMEDOUT;
833 		goto error;
834 	} else if (r < 0) {
835 		goto error;
836 	} else {
837 		r = 0;
838 	}
839 
840 	for (i = 0; i < adev->usec_timeout; i++) {
841 		tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9));
842 		if (tmp == 0xDEADBEEF)
843 			break;
844 		DRM_UDELAY(1);
845 	}
846 
847 	if (i >= adev->usec_timeout)
848 		r = -ETIMEDOUT;
849 
850 	dma_fence_put(fence);
851 error:
852 	return r;
853 }
854