xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c (revision 05cf4fe738242183f1237f1b3a28b4479348c0a1)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  */
26 
27 #include <linux/firmware.h>
28 #include <linux/module.h>
29 #include <drm/drmP.h>
30 #include <drm/drm.h>
31 
32 #include "amdgpu.h"
33 #include "amdgpu_pm.h"
34 #include "amdgpu_vcn.h"
35 #include "soc15d.h"
36 #include "soc15_common.h"
37 
38 #include "vcn/vcn_1_0_offset.h"
39 #include "vcn/vcn_1_0_sh_mask.h"
40 
41 /* 1 second timeout */
42 #define VCN_IDLE_TIMEOUT	msecs_to_jiffies(1000)
43 
44 /* Firmware Names */
45 #define FIRMWARE_RAVEN		"amdgpu/raven_vcn.bin"
46 #define FIRMWARE_PICASSO	"amdgpu/picasso_vcn.bin"
47 #define FIRMWARE_RAVEN2		"amdgpu/raven2_vcn.bin"
48 
49 MODULE_FIRMWARE(FIRMWARE_RAVEN);
50 MODULE_FIRMWARE(FIRMWARE_PICASSO);
51 MODULE_FIRMWARE(FIRMWARE_RAVEN2);
52 
53 static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
54 
55 int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
56 {
57 	unsigned long bo_size;
58 	const char *fw_name;
59 	const struct common_firmware_header *hdr;
60 	unsigned char fw_check;
61 	int r;
62 
63 	INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler);
64 
65 	switch (adev->asic_type) {
66 	case CHIP_RAVEN:
67 		if (adev->rev_id >= 8)
68 			fw_name = FIRMWARE_RAVEN2;
69 		else if (adev->pdev->device == 0x15d8)
70 			fw_name = FIRMWARE_PICASSO;
71 		else
72 			fw_name = FIRMWARE_RAVEN;
73 		break;
74 	default:
75 		return -EINVAL;
76 	}
77 
78 	r = request_firmware(&adev->vcn.fw, fw_name, adev->dev);
79 	if (r) {
80 		dev_err(adev->dev, "amdgpu_vcn: Can't load firmware \"%s\"\n",
81 			fw_name);
82 		return r;
83 	}
84 
85 	r = amdgpu_ucode_validate(adev->vcn.fw);
86 	if (r) {
87 		dev_err(adev->dev, "amdgpu_vcn: Can't validate firmware \"%s\"\n",
88 			fw_name);
89 		release_firmware(adev->vcn.fw);
90 		adev->vcn.fw = NULL;
91 		return r;
92 	}
93 
94 	hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
95 	adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version);
96 
97 	/* Bit 20-23, it is encode major and non-zero for new naming convention.
98 	 * This field is part of version minor and DRM_DISABLED_FLAG in old naming
99 	 * convention. Since the l:wq!atest version minor is 0x5B and DRM_DISABLED_FLAG
100 	 * is zero in old naming convention, this field is always zero so far.
101 	 * These four bits are used to tell which naming convention is present.
102 	 */
103 	fw_check = (le32_to_cpu(hdr->ucode_version) >> 20) & 0xf;
104 	if (fw_check) {
105 		unsigned int dec_ver, enc_major, enc_minor, vep, fw_rev;
106 
107 		fw_rev = le32_to_cpu(hdr->ucode_version) & 0xfff;
108 		enc_minor = (le32_to_cpu(hdr->ucode_version) >> 12) & 0xff;
109 		enc_major = fw_check;
110 		dec_ver = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xf;
111 		vep = (le32_to_cpu(hdr->ucode_version) >> 28) & 0xf;
112 		DRM_INFO("Found VCN firmware Version ENC: %hu.%hu DEC: %hu VEP: %hu Revision: %hu\n",
113 			enc_major, enc_minor, dec_ver, vep, fw_rev);
114 	} else {
115 		unsigned int version_major, version_minor, family_id;
116 
117 		family_id = le32_to_cpu(hdr->ucode_version) & 0xff;
118 		version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
119 		version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
120 		DRM_INFO("Found VCN firmware Version: %hu.%hu Family ID: %hu\n",
121 			version_major, version_minor, family_id);
122 	}
123 
124 	bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE;
125 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
126 		bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
127 	r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
128 				    AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.vcpu_bo,
129 				    &adev->vcn.gpu_addr, &adev->vcn.cpu_addr);
130 	if (r) {
131 		dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r);
132 		return r;
133 	}
134 
135 	return 0;
136 }
137 
138 int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
139 {
140 	int i;
141 
142 	kvfree(adev->vcn.saved_bo);
143 
144 	amdgpu_bo_free_kernel(&adev->vcn.vcpu_bo,
145 			      &adev->vcn.gpu_addr,
146 			      (void **)&adev->vcn.cpu_addr);
147 
148 	amdgpu_ring_fini(&adev->vcn.ring_dec);
149 
150 	for (i = 0; i < adev->vcn.num_enc_rings; ++i)
151 		amdgpu_ring_fini(&adev->vcn.ring_enc[i]);
152 
153 	amdgpu_ring_fini(&adev->vcn.ring_jpeg);
154 
155 	release_firmware(adev->vcn.fw);
156 
157 	return 0;
158 }
159 
160 int amdgpu_vcn_suspend(struct amdgpu_device *adev)
161 {
162 	unsigned size;
163 	void *ptr;
164 
165 	cancel_delayed_work_sync(&adev->vcn.idle_work);
166 
167 	if (adev->vcn.vcpu_bo == NULL)
168 		return 0;
169 
170 	size = amdgpu_bo_size(adev->vcn.vcpu_bo);
171 	ptr = adev->vcn.cpu_addr;
172 
173 	adev->vcn.saved_bo = kvmalloc(size, GFP_KERNEL);
174 	if (!adev->vcn.saved_bo)
175 		return -ENOMEM;
176 
177 	memcpy_fromio(adev->vcn.saved_bo, ptr, size);
178 
179 	return 0;
180 }
181 
182 int amdgpu_vcn_resume(struct amdgpu_device *adev)
183 {
184 	unsigned size;
185 	void *ptr;
186 
187 	if (adev->vcn.vcpu_bo == NULL)
188 		return -EINVAL;
189 
190 	size = amdgpu_bo_size(adev->vcn.vcpu_bo);
191 	ptr = adev->vcn.cpu_addr;
192 
193 	if (adev->vcn.saved_bo != NULL) {
194 		memcpy_toio(ptr, adev->vcn.saved_bo, size);
195 		kvfree(adev->vcn.saved_bo);
196 		adev->vcn.saved_bo = NULL;
197 	} else {
198 		const struct common_firmware_header *hdr;
199 		unsigned offset;
200 
201 		hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
202 		if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
203 			offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
204 			memcpy_toio(adev->vcn.cpu_addr, adev->vcn.fw->data + offset,
205 				    le32_to_cpu(hdr->ucode_size_bytes));
206 			size -= le32_to_cpu(hdr->ucode_size_bytes);
207 			ptr += le32_to_cpu(hdr->ucode_size_bytes);
208 		}
209 		memset_io(ptr, 0, size);
210 	}
211 
212 	return 0;
213 }
214 
215 static int amdgpu_vcn_pause_dpg_mode(struct amdgpu_device *adev,
216 				     struct dpg_pause_state *new_state)
217 {
218 	int ret_code;
219 	uint32_t reg_data = 0;
220 	uint32_t reg_data2 = 0;
221 	struct amdgpu_ring *ring;
222 
223 	/* pause/unpause if state is changed */
224 	if (adev->vcn.pause_state.fw_based != new_state->fw_based) {
225 		DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
226 			adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
227 			new_state->fw_based, new_state->jpeg);
228 
229 		reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
230 			(~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
231 
232 		if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
233 			ret_code = 0;
234 
235 			if (!(reg_data & UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK))
236 				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
237 						   UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
238 						   UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
239 
240 			if (!ret_code) {
241 				/* pause DPG non-jpeg */
242 				reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
243 				WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
244 				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
245 						   UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
246 						   UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code);
247 
248 				/* Restore */
249 				ring = &adev->vcn.ring_enc[0];
250 				WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
251 				WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
252 				WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
253 				WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
254 				WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
255 
256 				ring = &adev->vcn.ring_enc[1];
257 				WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
258 				WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
259 				WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
260 				WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
261 				WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
262 
263 				ring = &adev->vcn.ring_dec;
264 				WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
265 						   RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2));
266 				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
267 						   UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
268 						   UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
269 			}
270 		} else {
271 			/* unpause dpg non-jpeg, no need to wait */
272 			reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
273 			WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
274 		}
275 		adev->vcn.pause_state.fw_based = new_state->fw_based;
276 	}
277 
278 	/* pause/unpause if state is changed */
279 	if (adev->vcn.pause_state.jpeg != new_state->jpeg) {
280 		DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
281 			adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
282 			new_state->fw_based, new_state->jpeg);
283 
284 		reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
285 			(~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK);
286 
287 		if (new_state->jpeg == VCN_DPG_STATE__PAUSE) {
288 			ret_code = 0;
289 
290 			if (!(reg_data & UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK))
291 				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
292 						   UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
293 						   UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
294 
295 			if (!ret_code) {
296 				/* Make sure JPRG Snoop is disabled before sending the pause */
297 				reg_data2 = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS);
298 				reg_data2 |= UVD_POWER_STATUS__JRBC_SNOOP_DIS_MASK;
299 				WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, reg_data2);
300 
301 				/* pause DPG jpeg */
302 				reg_data |= UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK;
303 				WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
304 				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
305 							UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK,
306 							UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, ret_code);
307 
308 				/* Restore */
309 				ring = &adev->vcn.ring_jpeg;
310 				WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
311 				WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
312 							UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK |
313 							UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
314 				WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
315 							lower_32_bits(ring->gpu_addr));
316 				WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
317 							upper_32_bits(ring->gpu_addr));
318 				WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, ring->wptr);
319 				WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, ring->wptr);
320 				WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
321 							UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
322 
323 				ring = &adev->vcn.ring_dec;
324 				WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
325 						   RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2));
326 				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
327 						   UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
328 						   UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
329 			}
330 		} else {
331 			/* unpause dpg jpeg, no need to wait */
332 			reg_data &= ~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK;
333 			WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
334 		}
335 		adev->vcn.pause_state.jpeg = new_state->jpeg;
336 	}
337 
338 	return 0;
339 }
340 
341 static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
342 {
343 	struct amdgpu_device *adev =
344 		container_of(work, struct amdgpu_device, vcn.idle_work.work);
345 	unsigned int fences = 0;
346 	unsigned int i;
347 
348 	for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
349 		fences += amdgpu_fence_count_emitted(&adev->vcn.ring_enc[i]);
350 	}
351 
352 	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)	{
353 		struct dpg_pause_state new_state;
354 
355 		if (fences)
356 			new_state.fw_based = VCN_DPG_STATE__PAUSE;
357 		else
358 			new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
359 
360 		if (amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg))
361 			new_state.jpeg = VCN_DPG_STATE__PAUSE;
362 		else
363 			new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
364 
365 		amdgpu_vcn_pause_dpg_mode(adev, &new_state);
366 	}
367 
368 	fences += amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg);
369 	fences += amdgpu_fence_count_emitted(&adev->vcn.ring_dec);
370 
371 	if (fences == 0) {
372 		amdgpu_gfx_off_ctrl(adev, true);
373 		if (adev->pm.dpm_enabled)
374 			amdgpu_dpm_enable_uvd(adev, false);
375 		else
376 			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
377 							       AMD_PG_STATE_GATE);
378 	} else {
379 		schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
380 	}
381 }
382 
383 void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
384 {
385 	struct amdgpu_device *adev = ring->adev;
386 	bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
387 
388 	if (set_clocks) {
389 		amdgpu_gfx_off_ctrl(adev, false);
390 		if (adev->pm.dpm_enabled)
391 			amdgpu_dpm_enable_uvd(adev, true);
392 		else
393 			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
394 							       AMD_PG_STATE_UNGATE);
395 	}
396 
397 	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)	{
398 		struct dpg_pause_state new_state;
399 
400 		if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
401 			new_state.fw_based = VCN_DPG_STATE__PAUSE;
402 		else
403 			new_state.fw_based = adev->vcn.pause_state.fw_based;
404 
405 		if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG)
406 			new_state.jpeg = VCN_DPG_STATE__PAUSE;
407 		else
408 			new_state.jpeg = adev->vcn.pause_state.jpeg;
409 
410 		amdgpu_vcn_pause_dpg_mode(adev, &new_state);
411 	}
412 }
413 
414 void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)
415 {
416 	schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
417 }
418 
419 int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
420 {
421 	struct amdgpu_device *adev = ring->adev;
422 	uint32_t tmp = 0;
423 	unsigned i;
424 	int r;
425 
426 	WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0xCAFEDEAD);
427 	r = amdgpu_ring_alloc(ring, 3);
428 	if (r) {
429 		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
430 			  ring->idx, r);
431 		return r;
432 	}
433 	amdgpu_ring_write(ring,
434 		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0));
435 	amdgpu_ring_write(ring, 0xDEADBEEF);
436 	amdgpu_ring_commit(ring);
437 	for (i = 0; i < adev->usec_timeout; i++) {
438 		tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9));
439 		if (tmp == 0xDEADBEEF)
440 			break;
441 		DRM_UDELAY(1);
442 	}
443 
444 	if (i < adev->usec_timeout) {
445 		DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
446 			 ring->idx, i);
447 	} else {
448 		DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
449 			  ring->idx, tmp);
450 		r = -EINVAL;
451 	}
452 	return r;
453 }
454 
455 static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
456 				   struct amdgpu_bo *bo,
457 				   struct dma_fence **fence)
458 {
459 	struct amdgpu_device *adev = ring->adev;
460 	struct dma_fence *f = NULL;
461 	struct amdgpu_job *job;
462 	struct amdgpu_ib *ib;
463 	uint64_t addr;
464 	int i, r;
465 
466 	r = amdgpu_job_alloc_with_ib(adev, 64, &job);
467 	if (r)
468 		goto err;
469 
470 	ib = &job->ibs[0];
471 	addr = amdgpu_bo_gpu_offset(bo);
472 	ib->ptr[0] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0);
473 	ib->ptr[1] = addr;
474 	ib->ptr[2] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0);
475 	ib->ptr[3] = addr >> 32;
476 	ib->ptr[4] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0);
477 	ib->ptr[5] = 0;
478 	for (i = 6; i < 16; i += 2) {
479 		ib->ptr[i] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0);
480 		ib->ptr[i+1] = 0;
481 	}
482 	ib->length_dw = 16;
483 
484 	r = amdgpu_job_submit_direct(job, ring, &f);
485 	if (r)
486 		goto err_free;
487 
488 	amdgpu_bo_fence(bo, f, false);
489 	amdgpu_bo_unreserve(bo);
490 	amdgpu_bo_unref(&bo);
491 
492 	if (fence)
493 		*fence = dma_fence_get(f);
494 	dma_fence_put(f);
495 
496 	return 0;
497 
498 err_free:
499 	amdgpu_job_free(job);
500 
501 err:
502 	amdgpu_bo_unreserve(bo);
503 	amdgpu_bo_unref(&bo);
504 	return r;
505 }
506 
507 static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
508 			      struct dma_fence **fence)
509 {
510 	struct amdgpu_device *adev = ring->adev;
511 	struct amdgpu_bo *bo = NULL;
512 	uint32_t *msg;
513 	int r, i;
514 
515 	r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
516 				      AMDGPU_GEM_DOMAIN_VRAM,
517 				      &bo, NULL, (void **)&msg);
518 	if (r)
519 		return r;
520 
521 	msg[0] = cpu_to_le32(0x00000028);
522 	msg[1] = cpu_to_le32(0x00000038);
523 	msg[2] = cpu_to_le32(0x00000001);
524 	msg[3] = cpu_to_le32(0x00000000);
525 	msg[4] = cpu_to_le32(handle);
526 	msg[5] = cpu_to_le32(0x00000000);
527 	msg[6] = cpu_to_le32(0x00000001);
528 	msg[7] = cpu_to_le32(0x00000028);
529 	msg[8] = cpu_to_le32(0x00000010);
530 	msg[9] = cpu_to_le32(0x00000000);
531 	msg[10] = cpu_to_le32(0x00000007);
532 	msg[11] = cpu_to_le32(0x00000000);
533 	msg[12] = cpu_to_le32(0x00000780);
534 	msg[13] = cpu_to_le32(0x00000440);
535 	for (i = 14; i < 1024; ++i)
536 		msg[i] = cpu_to_le32(0x0);
537 
538 	return amdgpu_vcn_dec_send_msg(ring, bo, fence);
539 }
540 
541 static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
542 			       struct dma_fence **fence)
543 {
544 	struct amdgpu_device *adev = ring->adev;
545 	struct amdgpu_bo *bo = NULL;
546 	uint32_t *msg;
547 	int r, i;
548 
549 	r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
550 				      AMDGPU_GEM_DOMAIN_VRAM,
551 				      &bo, NULL, (void **)&msg);
552 	if (r)
553 		return r;
554 
555 	msg[0] = cpu_to_le32(0x00000028);
556 	msg[1] = cpu_to_le32(0x00000018);
557 	msg[2] = cpu_to_le32(0x00000000);
558 	msg[3] = cpu_to_le32(0x00000002);
559 	msg[4] = cpu_to_le32(handle);
560 	msg[5] = cpu_to_le32(0x00000000);
561 	for (i = 6; i < 1024; ++i)
562 		msg[i] = cpu_to_le32(0x0);
563 
564 	return amdgpu_vcn_dec_send_msg(ring, bo, fence);
565 }
566 
567 int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
568 {
569 	struct dma_fence *fence;
570 	long r;
571 
572 	r = amdgpu_vcn_dec_get_create_msg(ring, 1, NULL);
573 	if (r) {
574 		DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
575 		goto error;
576 	}
577 
578 	r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &fence);
579 	if (r) {
580 		DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
581 		goto error;
582 	}
583 
584 	r = dma_fence_wait_timeout(fence, false, timeout);
585 	if (r == 0) {
586 		DRM_ERROR("amdgpu: IB test timed out.\n");
587 		r = -ETIMEDOUT;
588 	} else if (r < 0) {
589 		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
590 	} else {
591 		DRM_DEBUG("ib test on ring %d succeeded\n",  ring->idx);
592 		r = 0;
593 	}
594 
595 	dma_fence_put(fence);
596 
597 error:
598 	return r;
599 }
600 
601 int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring)
602 {
603 	struct amdgpu_device *adev = ring->adev;
604 	uint32_t rptr = amdgpu_ring_get_rptr(ring);
605 	unsigned i;
606 	int r;
607 
608 	r = amdgpu_ring_alloc(ring, 16);
609 	if (r) {
610 		DRM_ERROR("amdgpu: vcn enc failed to lock ring %d (%d).\n",
611 			  ring->idx, r);
612 		return r;
613 	}
614 	amdgpu_ring_write(ring, VCN_ENC_CMD_END);
615 	amdgpu_ring_commit(ring);
616 
617 	for (i = 0; i < adev->usec_timeout; i++) {
618 		if (amdgpu_ring_get_rptr(ring) != rptr)
619 			break;
620 		DRM_UDELAY(1);
621 	}
622 
623 	if (i < adev->usec_timeout) {
624 		DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
625 			 ring->idx, i);
626 	} else {
627 		DRM_ERROR("amdgpu: ring %d test failed\n",
628 			  ring->idx);
629 		r = -ETIMEDOUT;
630 	}
631 
632 	return r;
633 }
634 
635 static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
636 			      struct dma_fence **fence)
637 {
638 	const unsigned ib_size_dw = 16;
639 	struct amdgpu_job *job;
640 	struct amdgpu_ib *ib;
641 	struct dma_fence *f = NULL;
642 	uint64_t dummy;
643 	int i, r;
644 
645 	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
646 	if (r)
647 		return r;
648 
649 	ib = &job->ibs[0];
650 	dummy = ib->gpu_addr + 1024;
651 
652 	ib->length_dw = 0;
653 	ib->ptr[ib->length_dw++] = 0x00000018;
654 	ib->ptr[ib->length_dw++] = 0x00000001; /* session info */
655 	ib->ptr[ib->length_dw++] = handle;
656 	ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
657 	ib->ptr[ib->length_dw++] = dummy;
658 	ib->ptr[ib->length_dw++] = 0x0000000b;
659 
660 	ib->ptr[ib->length_dw++] = 0x00000014;
661 	ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
662 	ib->ptr[ib->length_dw++] = 0x0000001c;
663 	ib->ptr[ib->length_dw++] = 0x00000000;
664 	ib->ptr[ib->length_dw++] = 0x00000000;
665 
666 	ib->ptr[ib->length_dw++] = 0x00000008;
667 	ib->ptr[ib->length_dw++] = 0x08000001; /* op initialize */
668 
669 	for (i = ib->length_dw; i < ib_size_dw; ++i)
670 		ib->ptr[i] = 0x0;
671 
672 	r = amdgpu_job_submit_direct(job, ring, &f);
673 	if (r)
674 		goto err;
675 
676 	if (fence)
677 		*fence = dma_fence_get(f);
678 	dma_fence_put(f);
679 
680 	return 0;
681 
682 err:
683 	amdgpu_job_free(job);
684 	return r;
685 }
686 
687 static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
688 				struct dma_fence **fence)
689 {
690 	const unsigned ib_size_dw = 16;
691 	struct amdgpu_job *job;
692 	struct amdgpu_ib *ib;
693 	struct dma_fence *f = NULL;
694 	uint64_t dummy;
695 	int i, r;
696 
697 	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
698 	if (r)
699 		return r;
700 
701 	ib = &job->ibs[0];
702 	dummy = ib->gpu_addr + 1024;
703 
704 	ib->length_dw = 0;
705 	ib->ptr[ib->length_dw++] = 0x00000018;
706 	ib->ptr[ib->length_dw++] = 0x00000001;
707 	ib->ptr[ib->length_dw++] = handle;
708 	ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
709 	ib->ptr[ib->length_dw++] = dummy;
710 	ib->ptr[ib->length_dw++] = 0x0000000b;
711 
712 	ib->ptr[ib->length_dw++] = 0x00000014;
713 	ib->ptr[ib->length_dw++] = 0x00000002;
714 	ib->ptr[ib->length_dw++] = 0x0000001c;
715 	ib->ptr[ib->length_dw++] = 0x00000000;
716 	ib->ptr[ib->length_dw++] = 0x00000000;
717 
718 	ib->ptr[ib->length_dw++] = 0x00000008;
719 	ib->ptr[ib->length_dw++] = 0x08000002; /* op close session */
720 
721 	for (i = ib->length_dw; i < ib_size_dw; ++i)
722 		ib->ptr[i] = 0x0;
723 
724 	r = amdgpu_job_submit_direct(job, ring, &f);
725 	if (r)
726 		goto err;
727 
728 	if (fence)
729 		*fence = dma_fence_get(f);
730 	dma_fence_put(f);
731 
732 	return 0;
733 
734 err:
735 	amdgpu_job_free(job);
736 	return r;
737 }
738 
739 int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
740 {
741 	struct dma_fence *fence = NULL;
742 	long r;
743 
744 	r = amdgpu_vcn_enc_get_create_msg(ring, 1, NULL);
745 	if (r) {
746 		DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
747 		goto error;
748 	}
749 
750 	r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, &fence);
751 	if (r) {
752 		DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
753 		goto error;
754 	}
755 
756 	r = dma_fence_wait_timeout(fence, false, timeout);
757 	if (r == 0) {
758 		DRM_ERROR("amdgpu: IB test timed out.\n");
759 		r = -ETIMEDOUT;
760 	} else if (r < 0) {
761 		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
762 	} else {
763 		DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
764 		r = 0;
765 	}
766 error:
767 	dma_fence_put(fence);
768 	return r;
769 }
770 
771 int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring)
772 {
773 	struct amdgpu_device *adev = ring->adev;
774 	uint32_t tmp = 0;
775 	unsigned i;
776 	int r;
777 
778 	WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0xCAFEDEAD);
779 	r = amdgpu_ring_alloc(ring, 3);
780 
781 	if (r) {
782 		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
783 				  ring->idx, r);
784 		return r;
785 	}
786 
787 	amdgpu_ring_write(ring,
788 		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0, 0, 0));
789 	amdgpu_ring_write(ring, 0xDEADBEEF);
790 	amdgpu_ring_commit(ring);
791 
792 	for (i = 0; i < adev->usec_timeout; i++) {
793 		tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9));
794 		if (tmp == 0xDEADBEEF)
795 			break;
796 		DRM_UDELAY(1);
797 	}
798 
799 	if (i < adev->usec_timeout) {
800 		DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
801 				  ring->idx, i);
802 	} else {
803 		DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
804 				  ring->idx, tmp);
805 		r = -EINVAL;
806 	}
807 
808 	return r;
809 }
810 
811 static int amdgpu_vcn_jpeg_set_reg(struct amdgpu_ring *ring, uint32_t handle,
812 		struct dma_fence **fence)
813 {
814 	struct amdgpu_device *adev = ring->adev;
815 	struct amdgpu_job *job;
816 	struct amdgpu_ib *ib;
817 	struct dma_fence *f = NULL;
818 	const unsigned ib_size_dw = 16;
819 	int i, r;
820 
821 	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
822 	if (r)
823 		return r;
824 
825 	ib = &job->ibs[0];
826 
827 	ib->ptr[0] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0, 0, PACKETJ_TYPE0);
828 	ib->ptr[1] = 0xDEADBEEF;
829 	for (i = 2; i < 16; i += 2) {
830 		ib->ptr[i] = PACKETJ(0, 0, 0, PACKETJ_TYPE6);
831 		ib->ptr[i+1] = 0;
832 	}
833 	ib->length_dw = 16;
834 
835 	r = amdgpu_job_submit_direct(job, ring, &f);
836 	if (r)
837 		goto err;
838 
839 	if (fence)
840 		*fence = dma_fence_get(f);
841 	dma_fence_put(f);
842 
843 	return 0;
844 
845 err:
846 	amdgpu_job_free(job);
847 	return r;
848 }
849 
850 int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout)
851 {
852 	struct amdgpu_device *adev = ring->adev;
853 	uint32_t tmp = 0;
854 	unsigned i;
855 	struct dma_fence *fence = NULL;
856 	long r = 0;
857 
858 	r = amdgpu_vcn_jpeg_set_reg(ring, 1, &fence);
859 	if (r) {
860 		DRM_ERROR("amdgpu: failed to set jpeg register (%ld).\n", r);
861 		goto error;
862 	}
863 
864 	r = dma_fence_wait_timeout(fence, false, timeout);
865 	if (r == 0) {
866 		DRM_ERROR("amdgpu: IB test timed out.\n");
867 		r = -ETIMEDOUT;
868 		goto error;
869 	} else if (r < 0) {
870 		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
871 		goto error;
872 	} else
873 		r = 0;
874 
875 	for (i = 0; i < adev->usec_timeout; i++) {
876 		tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9));
877 		if (tmp == 0xDEADBEEF)
878 			break;
879 		DRM_UDELAY(1);
880 	}
881 
882 	if (i < adev->usec_timeout)
883 		DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
884 	else {
885 		DRM_ERROR("ib test failed (0x%08X)\n", tmp);
886 		r = -EINVAL;
887 	}
888 
889 	dma_fence_put(fence);
890 
891 error:
892 	return r;
893 }
894